*-------------------------------------------------------------------------
*-------------------------------------------------------------------------
*--------------- APPENDIX *-----------------------------------------------
*-------------------------------------------------------------------------
*-------------------------------------------------------------------------


global Truck "/Users/yuanningliang/Truck/"

*-------------------------------------------------------------------------
* figure a2
// Data collected directly from BTS: National transportation statistics: Table 1-11: Number of U.S. Aircraft, Vehicles, Vessels, and Other Conveyances
// https://www.bts.gov/archive/publications/national_transportation_statistics/table_01_11

*-------------------------------------------------------------------------
* figure a3

* number of trucks / carriers inspected by yearmonth 
foreach i of numlist 1989/1994 1996/2018 {   
	insheet using "InspectionData/Insp_Pub_`i'.csv", clear
	gen year = floor(insp_date/10000)
	gen month = floor((insp_date - year*10000)/100)
	gen day = insp_date - year*10000 - month*100
	keep year month day inspection_id dot_number // no duplicates
	gen nbr_truck = 1 
	gen nbr_carrier = 1 
	collapse (sum) nbr_truck (mean) nbr_carrier, by(year month dot_number) 
	collapse (sum) nbr_truck nbr_carrier, by(year month)  	
	save "InspectionData/Insp_`i'_count.dta", replace
}
use "InspectionData/Insp_1989_count.dta", clear
foreach i of numlist 1990/1994 1996/2018 {   // inspection time info
	append using "InspectionData/Insp_`i'_count.dta"
	erase "InspectionData/Insp_`i'_count.dta"
}
save "InspectionData/Insp_count.dta", replace   // national by yearmonth

* graphs
use "InspectionData/Insp_count.dta", clear
gen yearmonth = ym(year,month)
format yearmonth %tm

replace nbr_carrier = nbr_carrier/10^3   // in thousands
replace nbr_truck = nbr_truck/10^6   // in millions
rename yearmonth year_month
collapse (sum) nbr_carrier nbr_truck , by(year)

merge m:1 year using "CensusData/census_op_drv_veh"  , nogen 
gen nbr_truck_norm = nbr_truck / nbr_tot_pwr_clean_
replace nbr_truck = nbr_truck*10^3   

twoway  (connected nbr_truck year, m(Oh)) ///
		(connected nbr_truck_norm year, m(Dh) lpattern(longdash) yaxis(2)) ///
		if year >= 1996 & year <= 2017, ///
		legend(order(1 "Number of Total Inspections" 2 "Number of Inspections per Truck") c(1) pos(6)) ///
		xlabel(1996(4)2017) xtitle("") ylabel(.3(.1)1, axis(2)) ///
		ytitle("Number of Inspections, in Thousands")  ytitle("Number of Inspections per Truck", axis(2)) ///
		graphregion(color(white)) xsize(6)
graph export "/Users/yuanningliang/Liang Dropbox/Yuanning Liang/Truck/outputs/SummStat/Graphs/pct_truck_insp.eps", replace




*-------------------------------------------------------------------------
* figure a4, a5

*** days of inspection per year  
use "InspectionData/Insp_by_year/Insp_100pct.dta", clear

gen inspection = 1

* merge in cz
merge m:1 insp_countyfips using "$Truck\processed_data\Geo_files\cz_eqv_fips.dta", nogen keep(master match) keepusing(CommutingZoneID2000)

keep if insp_facility == "F"

preserve 

* days that has inspection at a given station in one year
gcollapse (min) inspection, by(CommutingZoneID2000 insp_state insp_year insp_date)

* total nbr of inspection days at a given station in one year
gcollapse (sum) inspection, by(CommutingZoneID2000 insp_state insp_year)

* avg nbr of inspection per year per station in a cz
gcollapse (mean) inspection, by(CommutingZoneID2000)

rename CommutingZoneID2000 cz

maptile inspection, geo(cz2000) stateoutline(vthin) cutvalues(30 60 90 120 180 240) fcolor(Purples) ndfcolor(gs5%10) legdecimals(0) twopt(legend(order(8 "> 240" 7 "180 - 240" 6 "120 - 180" 5 "90 - 120" 4 "60 - 90" 3 "30 - 60" 2 "< 30" 1 "No inspection")))

graph export "$Truck/outputs/Log/rawplot/map_cz_insp_per_station.eps", replace

restore

* # of stations
preserve 

collapse (sum) inspection, by(CommutingZoneID2000 insp_state location)

bysort CommutingZoneID2000: gen nbr_station = _N
duplicates drop CommutingZoneID2000, force

rename CommutingZoneID2000 cz

maptile nbr_station, geo(cz2000) stateoutline(vthin) n(5) fcolor(Blues) ndfcolor(white)
graph export "$Truck/outputs/Log/rawplot/map_cz_station.eps", replace

restore



*---------------------------------------------------------------------------
* figure a6
*number of crash across years
use "$Truck/outputs/Log/plots/figures_Sep2019/figures", clear

rename var48 crash_year
rename var49 num_crash

rename var72 vmt
rename var73 vmt_truck

replace num_crash = num_crash/1000
replace vmt = vmt/1000
replace vmt_truck = vmt_truck/1000

gen vmt_truck_est = vmt * 0.1

tw (connected num_crash crash_year if crash_year ~= 2018, lcolor(dkorange) mcolor(dkorange) msymbol(D)) ///
	(connected vmt_truck_est crash_year if crash_year ~= 2018, yaxis(2) ytitle("VMT (billions)",axis(2)) ylabel(150(75)600, axis(2)) lcolor(navy) lpattern(shortdash) mcolor(navy) msymbol(Oh)), ///
	ylabel(50(25)200) xlabel(1996(2)2018) ///
	xtitle("") ytitle("Number of crashes (thousands)") legend(order(1 "Number of crashes" 2 "VMT")) ///
	graphregion(color(white))

graph export "$Truck/outputs\Log\plots\summ_stats/nbr_crash.pdf", replace




*--------------------------------------------------------------------------------------------------
* Figure A7 (upper panel)

use "$Truck/processed_data/InspectionData/Insp_by_year/temp/Insp_100pct_VIN.dta",clear
	// this data contains all trucks (both VIN and licenses)

* only fixed station
keep if insp_facility == "F"

* drop impossible fips
gen insp_statefips = substr(insp_countyfips,1,2)
drop if insp_state == "US" 
drop if insp_statefips == "03" | insp_statefips == "07" | insp_statefips == "14"  | insp_statefips == "43"  | insp_statefips == "52" 

* new_VIN has problems replace it
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "CARRIER"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "DRIVER"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "UNK"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "NONE"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "UNKNOWN"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "NA"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "N/A"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "00000000000000000"
replace insp_unit_vehicle_id_number = "" if strpos(insp_unit_vehicle_id_number,"RYDER")>0
replace insp_unit_vehicle_id_number = "" if strpos(insp_unit_vehicle_id_number,"9999999")>0
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "PENSKE"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "FLEET"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "INTERCHANGEABLE"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "unknown"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "0"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "00"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "000"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "0000"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "00000"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "000000"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "1"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "//"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "``"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "XTRALEASE"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "XTRA LEASE"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "UNREADABLE"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "UK"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "SHIPPER"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "SAME"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "NOTFOUND"
replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "MAPPER"

replace insp_unit_license_state = "" if insp_unit_license_state == "UK"
replace insp_unit_license = "" if insp_unit_license == "UNKNOWN"
replace insp_unit_license = "" if insp_unit_license == "UK"
replace insp_unit_license = "" if insp_unit_license == "UNK"
replace insp_unit_license = "" if insp_unit_license == "9"
replace insp_unit_license = "" if insp_unit_license == "UNK"
replace insp_unit_license = "" if insp_unit_license == "NONE"
replace insp_unit_license = "" if insp_unit_license == "TEMP"
replace insp_unit_license = "" if insp_unit_license == "NA"
replace insp_unit_license = "" if insp_unit_license == "N/A"
replace insp_unit_license = "" if insp_unit_license == "NEW"
replace insp_unit_license = "" if strpos(insp_unit_license,"NO")>0
replace insp_unit_license = "" if strpos(insp_unit_license,"T.O.P")>0
replace insp_unit_license = "" if strpos(insp_unit_license,"TOP")>0
replace insp_unit_license = "" if strpos(insp_unit_license,"ONLY")>0
replace insp_unit_license = "" if strpos(insp_unit_license,"TEMP")>0
replace insp_unit_license = "" if insp_unit_license == "PENDING"
replace insp_unit_license = "" if insp_unit_license == "VIN"
replace insp_unit_license = "" if insp_unit_license == "APPLIED"
replace insp_unit_license = "" if insp_unit_license == "INTRANSIT"
replace insp_unit_license = "" if insp_unit_license == "L9999"
replace insp_unit_license = "" if insp_unit_license == "PERMIT"

egen newVIN2 = group(insp_unit_vehicle_id_number)
egen temp1 = group(insp_unit_license insp_unit_license_state)
replace newVIN2 = -temp1 if newVIN2 == .


gcollapse (sum) inspection, by(newVIN2 new_license insp_unit_vehicle_id_number insp_unit_license_state insp_unit_license date insp_statefips) 

* append crash data
* vin
preserve 

use "$Truck/processed_data/CrashData/Crash_allVIN",clear

gcollapse (sum) crash, by(insp_unit_vehicle_id_number date) 

tempfile crash_vin
save `crash_vin'

restore

append using `crash_vin'

bys insp_unit_vehicle_id_number: egen crash_VIN = min(newVIN2)
replace crash_VIN = . if insp_unit_vehicle_id_number == ""

* license
preserve 

use "$Truck/processed_data/CrashData/Crash_alllic",clear

gcollapse (sum) crash, by(insp_unit_license insp_unit_license_state date) 

tempfile crash_lic
save `crash_lic'

restore

append using `crash_lic'

bys insp_unit_license insp_unit_license_state: egen crash_lic = min(newVIN2)
replace crash_lic = . if insp_unit_license == ""
replace crash_lic = . if insp_unit_license_state == ""

* combine both id
gen vehicle = crash_VIN 
replace vehicle = crash_lic if crash_VIN ==.
	// 4mil vehicle ==. because there are some crash that involves vehicles not in the inspection files
format vehicle %10.0f

* drop vehicles with only crash but no inspection
drop if vehicle == . 

* create unbalanced panel to tsset time
gcollapse (sum) inspection crash, by(vehicle date insp_statefips insp_unit_license insp_unit_license_state insp_unit_vehicle_id_number)

* look at the date to the most recent inspection
sort vehicle date
gegen seq_insp = group(date)  
bysort vehicle: egen rank_insp = rank(seq_insp)

replace rank_insp = floor(rank_insp/1) 
duplicates drop vehicle rank_insp, force

tsset vehicle rank_insp

* drop vehicles with only crash but no inspection records 
bys vehicle: egen n_insp = sum(inspection)
bys vehicle: egen n_crash = sum(crash)
drop if n_insp == 0
drop if n_crash > 2 // <1%

* closest inspection after crash 
gen after_crash_insp = F.date - date if crash == 1
replace after_crash_insp = F2.date - date if F.inspection == 0 & crash == 1

* closest inspection before crash 
gen D_crash_insp = date - L.date if crash == 1
replace D_crash_insp = date - L2.date if L.inspection == 0 & crash == 1

compress
save "$Truck/processed_data/CrashData/crash_insp.dta", replace

* plot before and after
use "$Truck/processed_data/CrashData/crash_insp.dta", clear

*tsset vehicle rank_insp

* combien before and after 
keep if after_crash_insp ~=. | D_crash_insp ~=. 

keep crash after_crash_insp D_crash_insp

rename (after_crash_insp D_crash_insp) (crash_insp1 crash_insp2)

gen id = _n
reshape long crash_insp, i(id) j(time)

replace crash_insp = -crash_insp if time == 1

* number of crash by D_crash_insp days
gcollapse (sum) crash, by(crash_insp)

* months of 1 year
gen group = .
foreach i of numlist 1/12 { 
	replace group = `i'-1 if inrange(crash_insp,(`i'-1)*30,`i'*30) & group == .
	replace group = -`i' if inrange(crash_insp,-`i'*30,(-`i'+1)*30) & group == .
}

gcollapse (sum) crash, by(group)
replace crash = crash / 1000

lab define lb_group -12 "(-12,-11)" -8 "(-8,-7)" -4 "(-4,-3)" 0 "(0,1)" 4 "(4,5)" 8 "(8,9)" 11 "(11,12)" , replace 
lab values group lb_group

tw bar crash group if inrange(group,-12,11), color(gs10%50)  ///
	xlabel(-12(4)8 11, valuelabel nogrid) ylabel(0(20)60, nogrid) legend(off) ///
	xtitle("Number of months since last inspection") ytitle("Number of Crashes (in 1,000)") ///
	graphregion(color(white)) 
gr export "$Truck/r&r/new results/crash_month.pdf", replace


*--------------------------------------------------------------------------------------------------
* Figure A7 (lower panel)
use "$Truck/processed_data/InspectionData/Insp_by_year/new/reinsp_st_yr2.dta", clear

fmerge 1:1 state year using "$Truck/processed_data/CrashData/crash_rate.dta"

drop if state == 0 | state ==. | state > 56
drop _merge

keep if pct90~=0 & crash_rate ~=0

* binscatter: 
* flag relevant sample
reghdfe crash_rate pct90, a(state year)
gen sample = e(sample)

* binscatter var prep 
foreach var in crash_rate pct90 {
	reghdfe `var' i.year if sample == 1, a(state) res(resid_`var')
}

* add back the mean
replace resid_crash_rate = resid_crash_rate + 0.042
replace resid_pct90 = resid_pct90 + 0.599


binscatter resid_crash_rate resid_pct90, reportreg n(10) ///
	mcol(blue) lcol(teal) xsize(4.5) ///
	xtitle("Probability of reinspection in a quarter") ///
	ytitle("Crash per million VMT") ///
	ylabel(0.035(0.005)0.05, nogrid)  ///
	text(0.047 0.65 "slope = -0.025***", size(large)) ///
	text(0.046 0.665 "(0.008)", size(large)) ///
	graphregion(col(white))
gr export "$Truck/outputs/Log/rawplot/crash_reinsp2.pdf", replace





*-------------------------------------------------------------------------------
* figure a8 and table a4:c1
*** decal vs "just missed decal" trucks 

* process inspection raw data to get registration_date 
clear 

forvalues i = 1996/2018 {
	import delimited using "$Truck/processed_data/InspectionData/Insp_Pub_`i'.csv",clear 

	keep inspection_id insp_date registration_date insp_level_id

	gen insp_year = `i'
	destring insp_level_id, force replace
	
	save "$Truck/processed_data/InspectionData/temp/reg_`i'.dta"
}

use "$Truck/processed_data/InspectionData/temp/reg_1996.dta", clear
foreach i of numlist 1997/2018 {  
	append using "$Truck/processed_data/InspectionData/temp/reg_`i'.dta"
}

gen reg_year = floor((registration_date/10000))
gen reg_month = floor((registration_date - reg_year*10000)/100)
gen reg_day = registration_date - reg_year*10000 - reg_month*100
gen reg_date = mdy(reg_month, reg_day, reg_year)

gen insp_month = floor((insp_date - insp_year*10000)/100)
gen insp_day = insp_date - insp_year*10000 - insp_month*100
gen inspection_date = mdy(insp_month, insp_day, insp_year)

gen repair_days = reg_date - inspection_date 

compress
save "$Truck/processed_data/InspectionData/reg_date.dta"

forvalues i = 1996/2018 {
	erase "$Truck/processed_data/InspectionData/temp/reg_`i'.dta"
}


* load outcome analysis file
local insp_list insp_countyfips event_time new_VIN new_group_id year month dayofweek post_insp  flag_oos insp_m* insp_p* inspection inspection_id inspection_id dot_number insp_unit_license insp_unit_license_state insp_unit_vehicle_id_number insp_year insp_month insp_day date insp_time viol_total oos_total driver_viol_total vehicle_viol_total flag_oos
local crash_list crash crash_event_18hr crash_time event_id1 event_id2 event_id3 event_id4 

use `insp_list' `crash_list' using "$Truck/processed_data/EventData/division_100pct/Insp_Crash_100pct_outcome.dta" if inrange(event_time,-14,13) & crash_event_18hr ~=1 , clear

* merge in truck decal/violations info
fmerge m:1 inspection_id insp_year using "$Truck/processed_data/InspectionData/Insp_outcome.dta", keepusing(driver_oos_total vehicle_oos_total alcohol_control_sub_n drug_intrdctn_search_n size_weight_enf_n traffic_enf_n insp_unit_decal_n) nogen keep(match master)

* decal trucks 
keep if insp_unit_decal_n == 1 | insp_unit_decal_n == 0 
	* note: 99 or missing indicates missing

* merge in citation_ind 
preserve 

use "$Truck/processed_data/InspectionData/Insp_Violation.dta", clear

drop if citation_ind == .
	* note: starts in year2000
gcollapse (max) citation_ind, by(inspection_id insp_year)

tempfile citation 
save `citation' 

restore 

fmerge m:1 inspection_id insp_year using `citation', keep(master match) 
drop _merge 

* merge in registration date 
fmerge m:1 inspection_id insp_year using "$Truck/processed_data/InspectionData/reg_date.dta", keepusing(repair_days insp_level_id reg_date) nogen keep(master match)

* reduce file size 
drop insp_countyfips event_id1 event_id2 event_id3 event_id4 insp_year insp_month insp_day driver_oos_total vehicle_oos_total

compress
save "$Truck/processed_data/EventData/division_100pct/decal_analysis.dta", replace

*** variable data prep done ***


use "$Truck/processed_data/EventData/division_100pct/decal_analysis.dta",clear 

replace repair_days = . if repair_days < 0

* only keep insp level id = 1,5,6 
keep if insp_level_id == 1 | insp_level_id == 5 | insp_level_id == 6 

gen treat_gp1 = 0 if insp_unit_decal_n == 0
replace treat_gp1 = . if citation == 1
replace treat_gp1 = 1 if insp_unit_decal_n == 1

* drop trucks went to long repairs
replace treat_gp1 = . if repair_days > 0 & repair_days ~=. 

/* drop inspections during special event inspections + holidays */
if 1 {
	
	gen insp_year = year(insp_date)
	gen insp_month = month(insp_date)
	gen insp_day = day(insp_date)
	gen insp_dow = dow(insp_date)

	* 1. roadcheck 72 hours
	gen roadcheck_day = 1 if insp_month == 6 & insp_day <= 8 & insp_day > 1 & insp_dow == 2

	gen date_roadcheck_day = date if roadcheck_day == 1
	bysort insp_year: egen date_roadcheck = max(date_roadcheck_day)

	replace roadcheck_day = 1 if date == date_roadcheck + 1 
	replace roadcheck_day = 1 if date == date_roadcheck + 2

	* 2. brake week
	gen brakeweek_day = 1 if insp_month == 9 & insp_day > 5 & insp_day <= 12 & insp_dow == 0

	gen date_brakeweek_day = date if brakeweek_day == 1
	bysort insp_year: gegen date_brakeweek = max(date_brakeweek_day)
	replace date_brakeweek = mdy(9,7,2017) if insp_year == 2017

	replace brakeweek_day = 1 if inrange(date, date_brakeweek, date_brakeweek+6)

	* 3. brake day
	gen brakecheck_day = 1 if insp_month == 5 & insp_day <= 7 & insp_dow == 3

	* 4. holidays 
	gen holiday = 0
	replace holiday = 1 if insp_month == 1 & insp_day == 1 // New Year
	replace holiday = 1 if insp_month == 1 & insp_dow == 1 & insp_day >= 15 & insp_day <= 21  // MLK
	replace holiday = 1 if insp_month == 2 & insp_dow == 1 & insp_day >= 15 & insp_day <= 21  // GW
	replace holiday = 1 if insp_month == 5 & insp_dow == 1 & insp_day >= 25 & insp_day <= 31 // memorial
	replace holiday = 1 if insp_month == 7 & insp_day==4 // indep
	replace holiday = 1 if insp_month == 9 & insp_dow == 1 & insp_day >= 1 & insp_day <= 7 // memorial
	replace holiday = 1 if insp_month == 10 & insp_dow == 1 & insp_day >= 8 & insp_day <= 14 // labor
	replace holiday = 1 if insp_month == 11 & insp_day == 11 // veterans
	replace holiday = 1 if insp_month == 11 & insp_dow == 4 & insp_day >= 22 & insp_day <= 28 // thanksgiving
	replace holiday = 1 if insp_month == 11 & insp_dow == 5 & insp_day >= 23 & insp_day <= 29 // thanksgiving
	replace holiday = 1 if insp_month == 12 & insp_day == 24 // xmas
	replace holiday = 1 if insp_month == 12 & insp_day == 25 // xmas

	* drop special event inspections + holiday
	drop if roadcheck_day == 1 | brakeweek_day == 1 | brakecheck_day == 1 | holiday == 1

}
* new_VIN has problems replace it
if 1 {
	drop new_VIN 

	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "CARRIER"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "DRIVER"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "UNK"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "NONE"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "UNKNOWN"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "NA"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "N/A"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "00000000000000000"
	replace insp_unit_vehicle_id_number = "" if strpos(insp_unit_vehicle_id_number,"RYDER")>0
	replace insp_unit_vehicle_id_number = "" if strpos(insp_unit_vehicle_id_number,"9999999")>0
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "PENSKE"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "FLEET"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "INTERCHANGEABLE"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "unknown"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "0"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "00"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "000"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "0000"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "00000"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "000000"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "1"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "//"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "``"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "XTRALEASE"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "XTRA LEASE"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "UNREADABLE"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "UK"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "SHIPPER"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "SAME"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "NOTFOUND"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "MAPPER"

	replace insp_unit_license_state = "" if insp_unit_license_state == "UK"
	replace insp_unit_license = "" if insp_unit_license == "UNKNOWN"
	replace insp_unit_license = "" if insp_unit_license == "UK"
	replace insp_unit_license = "" if insp_unit_license == "UNK"
	replace insp_unit_license = "" if insp_unit_license == "9"
	replace insp_unit_license = "" if insp_unit_license == "UNK"
	replace insp_unit_license = "" if insp_unit_license == "NONE"
	replace insp_unit_license = "" if insp_unit_license == "TEMP"
	replace insp_unit_license = "" if insp_unit_license == "NA"
	replace insp_unit_license = "" if insp_unit_license == "N/A"
	replace insp_unit_license = "" if insp_unit_license == "NEW"
	replace insp_unit_license = "" if strpos(insp_unit_license,"NO")>0
	replace insp_unit_license = "" if strpos(insp_unit_license,"T.O.P")>0
	replace insp_unit_license = "" if strpos(insp_unit_license,"TOP")>0
	replace insp_unit_license = "" if strpos(insp_unit_license,"ONLY")>0
	replace insp_unit_license = "" if strpos(insp_unit_license,"TEMP")>0
	replace insp_unit_license = "" if insp_unit_license == "PENDING"
	replace insp_unit_license = "" if insp_unit_license == "VIN"
	replace insp_unit_license = "" if insp_unit_license == "APPLIED"
	replace insp_unit_license = "" if insp_unit_license == "INTRANSIT"
	replace insp_unit_license = "" if insp_unit_license == "L9999"
	replace insp_unit_license = "" if insp_unit_license == "PERMIT"

	egen new_VIN = group(insp_unit_vehicle_id_number)
	egen temp1 = group(insp_unit_license insp_unit_license_state)
	replace new_VIN = -temp1 if new_VIN == .
	drop temp1 
}

preserve 

keep if treat_gp1 ~=. 

* select sample with no change in treatment status 
bys new_VIN: gegen avg_treat =  mean(treat_gp1)

keep if avg_treat == 1 | avg_treat == 0 
	// drop 40%
drop avg_treat 

reghdfe crash post_insp##treat_gp1 insp_p insp_m if crash_event_18hr ~=1 , absorb(new_VIN year month dayofweek ) vce(cluster new_VIN )
reghdfe crash post_insp##treat_gp1 insp_p insp_m if crash_event_18hr ~=1 , absorb(new_VIN date) vce(cluster new_VIN )

summ crash if e(sample)


*** dynamic DID
local insp_F
forvalues t = 14(-2)4{
local s = `t'-1
local insp_F `insp_F' insp_p`s'_`t'
}
forvalues t = 1(2)13 {
local s = `t'-1
local insp_F `insp_F' insp_m`s'_`t'
}

foreach var of varlist `insp_F' {
	replace `var' = 0 if treat_gp1 == 0 
}

reghdfe crash `insp_F' insp_p insp_m if crash_event_18hr ~=1, absorb(new_VIN date) vce(cluster new_VIN)


* plot
use "$Truck/r&r/new results/revision_plots.dta",clear

rename (var18 var19 var20) (decal1 lb_decal1 ub_decal1)

local mean = 0.0000614
replace decal1 = decal1/`mean' * 100
replace lb_decal1 = lb_decal1/`mean' * 100
replace ub_decal1 = ub_decal1/`mean' * 100

local color_treat "black"

local var decal1
local xaxis event_day
twoway (rarea lb_`var' ub_`var' event_day if event_day <= -4, color(`color_treat'%20) lcolor(bg)) ///
	(rarea lb_`var' ub_`var' event_day if inrange(event_day,0,12), color(`color_treat'%20) lcolor(bg)) ///
	(connected `var' event_day if inrange(event_day,-14,12), msize(medium) mcolor(`color_treat') lcolor(`color_treat') lpattern(solid) lwidth(medium)) ///
	(scatteri 80 0 -20 0 , recast(line) lp(solid) lcolor(gs10%50) lwidth(0.6)), ///
	yline(0, lp(dash) lcol(black%50)) ///
	legend(off) xlabel(-14(2)12, nogrid) ylabel(-20(20)80, nogrid) ///
	xtitle("day") ytitle("% change (day-2=0)") graphregion(color(white)) xsize(4.5)
	
graph export "$Truck/r&r/new results/decal1.pdf", replace	






*-------------------------------------------------------------------------
/* figure A9:Comparison of Trucks Inspected Today vs. Those Inspected Slightly Later */
* and table A4:C2-4

* delta = 30, 15 or 45
local insp_varlist date inspection new_VIN insp_time inspection_id insp_unit_license insp_unit_license_state insp_unit_vehicle_id_number 
local d = 15

use `insp_varlist' using "$Truck/processed_data/EventData/division_100pct/Insp_Crash_100pct_pre.dta" if inspection == 1 , clear nolabel

gen insp_date = date

gisid inspection_id insp_date 

sort new_VIN insp_date insp_time

gegen seq_insp = group(insp_date insp_time inspection_id)  

bysort new_VIN: egen rank_insp = rank(seq_insp)

tsset new_VIN rank_insp

gen D_insp_date = insp_date - L.insp_date

keep if D_insp_date <= `d'*2

* define t&c groups
gen treat_d`d'= 1 if inrange(D_insp_date,1,`d')
replace treat_d`d' = 0 if inrange(D_insp_date,`d'+1,2*`d')

* select sample with no change in treatment status 
bys new_VIN: gegen avg_treat =  mean(treat_d`d')

keep if avg_treat == 1 | avg_treat == 0 
	// drop >60%
drop avg_treat 


* find real shock date for the treatment group and placebo shock date for the control group
gen shock_date = date if treat_d`d' == 1 
replace shock_date = date - `d' if treat_d`d' == 0 

* expand to full panel 
keep if shock_date ~=. 

gisid inspection_id insp_date 

expand 28

bys inspection_id insp_date: gen event_time = _n -15 

drop date 
gen date = shock_date + event_time

tempfile insp
save `insp'

* step 3: match to crash data 
* VIN all exist
use `insp', clear 

keep if insp_unit_vehicle_id_number ~= ""

local varlist "report_date report_time crash_time crash fatalities injuries vehicles_in_accident"

merge m:1 date insp_unit_vehicle_id_number using "$Truck/processed_data/CrashData/Crash_allVIN", nogen keep(master match) keepusing(`varlist')

foreach var of local varlist {
rename `var' `var'VIN
}

merge m:1 date insp_unit_license insp_unit_license_state using "$Truck/processed_data/CrashData/Crash_alllic", nogen keep(master match) keepusing(`varlist')

foreach var of local varlist {
replace `var'VIN = `var' if `var'VIN == .
}

drop `varlist'
foreach var of local varlist {
rename `var'VIN `var'
}

tempfile insp_crash_vin
save `insp_crash_vin'

* VIN all missing, use lic
use `insp', clear 

keep if insp_unit_vehicle_id_number == ""

local varlist "report_date report_time crash_time crash fatalities injuries vehicles_in_accident"

merge m:1 date insp_unit_license insp_unit_license_state using "$Truck/processed_data/CrashData/Crash_alllic", nogen keep(master match) keepusing(`varlist')

tempfile insp_crash_lic
save `insp_crash_lic'

* append two datasets
use `insp_crash_lic', clear
append using `insp_crash_vin'

local crash_var "crash fatalities injuries vehicles_in_accident"
foreach i of local crash_var {
	replace `i' = 0 if `i' == .
}

gen post_insp = (event_time >= 0 )

format date %td

gen year = year(date)
gen month = month(date)
gen day = day(date)
gen dayofweek = dow(date)

* outside event window insp indicator
gegen last_insp = max(insp_date), by(new_VIN)
gegen first_insp = min(insp_date), by(new_VIN)

gen insp_p = 0
gen insp_m = 0
replace insp_p = 1 if insp_date > first_insp
replace insp_m = 1 if insp_date < last_insp

gegen new_group_id = group(inspection_id shock_date)

* drop events within 3,6,12,18,24 hours of inspection
foreach t of numlist 18 {
	local hr_`t' = mdyhms(1,1,2000,`t',0,0)-mdyhms(1,1,2000,0,0,0)

	gen temp_crash_`t'hr = 1 if crash_time >= insp_time - `hr_`t'' & crash_time < insp_time
	gegen crash_event_`t'hr = max(temp_crash_`t'hr), by(new_group_id) 
}

drop temp_crash*

tempfile delta`d' 
save `delta`d'' 


reghdfe crash post_insp##treat_d`d' insp_p insp_m if crash_event_18hr ~=1, absorb(new_VIN year month dayofweek) vce(cluster new_VIN)
reghdfe crash post_insp##treat_d`d' insp_p insp_m if crash_event_18hr ~=1, absorb(new_VIN date) vce(cluster new_VIN)


* plot
use "$Truck/r&r/new results/revision_plots.dta",clear

rename (var24 var25 var26) (current30 lb_current30 ub_current30)

local mean = 0.0000797
replace current30 = current30/`mean' * 100
replace lb_current30 = lb_current30/`mean' * 100
replace ub_current30 = ub_current30/`mean' * 100

local color_treat "black"

local var current30
local xaxis event_day
twoway (rarea lb_`var' ub_`var' event_day if event_day <= -4, color(`color_treat'%20) lcolor(bg)) ///
	(rarea lb_`var' ub_`var' event_day if inrange(event_day,0,12), color(`color_treat'%20) lcolor(bg)) ///
	(connected `var' event_day if inrange(event_day,-14,12), msize(medium) mcolor(`color_treat') lcolor(`color_treat') lpattern(solid) lwidth(medium)) ///
	(scatteri 120 0 -40 0 , recast(line) lp(solid) lcolor(gs10%50) lwidth(0.6)), ///
	yline(0, lp(dash) lcol(black%50)) ///
	legend(off) xlabel(-14(2)12, nogrid) ylabel(-40(40)120, nogrid) ///
	xtitle("day") ytitle("% change (day-2=0)") graphregion(color(white)) xsize(4.5)
	
graph export "$Truck/r&r/new results/current30.pdf", replace	



*-------------------------------------------------------------------------------------
* figure A10 and table A4:C5

/* find trucks inspected in 2017 */
local insp_varlist insp_unit_license insp_unit_license_state insp_unit_vehicle_id_number year month date dayofweek new_VIN new_group_id inspection dot_number inspection_id insp_year insp_start_time

use `insp_varlist' using "$Truck/processed_data/EventData/division_100pct/Insp_Crash_100pct_pre.dta" if insp_year >= 2016 & inspection == 1 , clear nolabel

keep if dot_number ~= .

* merge in carrier info (driver, vehicle, cargo) 
fmerge m:1 dot_number using "$Truck/processed_data/CensusData/carrier_census", keepusing(tot_trucks tot_buses tot_pwr fleetsize tot_cars inter_drs intra_drs avg_tld tot_drs cdl_drs drs_lt100 drs_gt100) 

* drop exited carriers 
drop if _merge == 1 
* drop those only appear in the census for some reason
drop if _merge == 2 
drop _merge 

* keep only multi-truck carriers 
keep if tot_pwr > 1 
summ tot_pwr, detail
keep if tot_pwr <= `r(p99)'
	
* flag trucks in 2017
gen truck2017 = (insp_year ==2017)

* find out the carriers for trucks inspected in 2017, drop the others
preserve 

keep if truck2017 == 1 
keep dot_number

duplicates drop dot_number, force 

tempfile dot_number
save `dot_number'

restore 

fmerge m:1 dot_number using `dot_number'

keep if _merge ==3 
drop _merge


* find and drop the same truck that were also inspected in other years 
preserve 

keep if truck2017 == 1 

duplicates drop new_VIN, force 

keep new_VIN

tempfile truck_insp2017
save `truck_insp2017'

restore 

fmerge m:1 new_VIN using `truck_insp2017'

drop if _merge == 3 & truck2017 ~=1 

* control group: trucks not inspected in 2017, but belong to the same carrier as those trucks inspected in 2017 
gen treat_gp = 0 if _merge == 1 & insp_year ~= 2017
replace treat_gp = 1 if truck2017 == 1
drop _merge 

tab treat_gp 

compress 
save "$Truck/processed_data/InspectionData/Insp_control_gp1.dta", replace


*** expand to full panel 
use "$Truck/processed_data/InspectionData/Insp_control_gp1.dta",clear 

/* drop inspections during special event inspections + holidays */
if 1 {

drop insp_year
gen insp_day = day(date)
rename (year month dayofweek) (insp_year insp_month insp_dow)


* 1. roadcheck 72 hours
gen roadcheck_day = 1 if insp_month == 6 & insp_day <= 8 & insp_day > 1 & insp_dow == 2

gen date_roadcheck_day = date if roadcheck_day == 1
bysort insp_year: egen date_roadcheck = max(date_roadcheck_day)

replace roadcheck_day = 1 if date == date_roadcheck + 1 
replace roadcheck_day = 1 if date == date_roadcheck + 2

* 2. brake week
gen brakeweek_day = 1 if insp_month == 9 & insp_day > 5 & insp_day <= 12 & insp_dow == 0

gen date_brakeweek_day = date if brakeweek_day == 1
bysort insp_year: gegen date_brakeweek = max(date_brakeweek_day)
replace date_brakeweek = mdy(9,7,2017) if insp_year == 2017

replace brakeweek_day = 1 if inrange(date, date_brakeweek, date_brakeweek+6)

* 3. brake day
gen brakecheck_day = 1 if insp_month == 5 & insp_day <= 7 & insp_dow == 3

* 4. holidays 
gen holiday = 0
replace holiday = 1 if insp_month == 1 & insp_day == 1 // New Year
replace holiday = 1 if insp_month == 1 & insp_dow == 1 & insp_day >= 15 & insp_day <= 21  // MLK
replace holiday = 1 if insp_month == 2 & insp_dow == 1 & insp_day >= 15 & insp_day <= 21  // GW
replace holiday = 1 if insp_month == 5 & insp_dow == 1 & insp_day >= 25 & insp_day <= 31 // memorial
replace holiday = 1 if insp_month == 7 & insp_day==4 // indep
replace holiday = 1 if insp_month == 9 & insp_dow == 1 & insp_day >= 1 & insp_day <= 7 // memorial
replace holiday = 1 if insp_month == 10 & insp_dow == 1 & insp_day >= 8 & insp_day <= 14 // labor
replace holiday = 1 if insp_month == 11 & insp_day == 11 // veterans
replace holiday = 1 if insp_month == 11 & insp_dow == 4 & insp_day >= 22 & insp_day <= 28 // thanksgiving
replace holiday = 1 if insp_month == 11 & insp_dow == 5 & insp_day >= 23 & insp_day <= 29 // thanksgiving
replace holiday = 1 if insp_month == 12 & insp_day == 24 // xmas
replace holiday = 1 if insp_month == 12 & insp_day == 25 // xmas

* drop special event inspections + holiday
drop if roadcheck_day == 1 | brakeweek_day == 1 | brakecheck_day == 1 | holiday == 1

drop roadcheck_day  brakeweek_day brakecheck_day holiday
}

* replace control_gp inspection time to a random inspection day in 2017 when the truck in the same carrier is inspected.
gen insp_date1 = date if treat_gp == 1

set seed 1155665

sort dot_number inspection_id 

bys dot_number treat_gp: gen insp2017_seq = _n if treat_gp ==1 

bys dot_number treat_gp: gen tot_insp2017 = _N if treat_gp ==1 
bys dot_number: egen tot_insp = min(tot_insp2017) 

drop if tot_insp == .

gen rand_insp_seq = runiformint(1,tot_insp) if treat_gp ==0

* merge in control group inspection time 
preserve 

keep insp_date1 insp2017_seq dot_number 
drop if insp2017_seq == .

duplicates drop 

rename (insp_date1 insp2017_seq) (insp_date0 rand_insp_seq)

tempfile insp2017
save `insp2017' 

restore

fmerge m:1 dot_number rand_insp_seq using `insp2017', keepusing(insp_date0) keep(master match) nogen

gen insp_date = insp_date1 
replace insp_date = insp_date0 if insp_date1 == .

* generate inspection panel 
keep dot_number new_VIN treat_gp insp_date insp_unit_license insp_unit_license_state insp_unit_vehicle_id_number insp_start_time

* some VIN and license plates do not align, insp_start_time could vary, drop them (0.04mil/2mil = 2%)
duplicates drop dot_number new_VIN treat_gp insp_date , force 
 
expand 28

bys dot_number new_VIN treat_gp insp_date: gen event_time = _n -15 

gen date = insp_date + event_time

gen insp_year = year(insp_date)
gen insp_month = month(insp_date)
gen insp_day = day(insp_date)

compress
save "$Truck/processed_data/EventData/insp_panel/insp_event_control_gp1.dta", replace


*** match crash history
* VIN all exist
use "$Truck/processed_data/EventData/insp_panel/insp_event_control_gp1.dta", clear

keep if insp_unit_vehicle_id_number ~= ""

local varlist "report_date report_time crash_time crash fatalities injuries vehicles_in_accident"

merge m:1 date insp_unit_vehicle_id_number using "$Truck/processed_data/CrashData/Crash_allVIN", nogen keep(master match) keepusing(`varlist')

foreach var of local varlist {
rename `var' `var'VIN
}

merge m:1 date insp_unit_license insp_unit_license_state using "$Truck/processed_data/CrashData/Crash_alllic", nogen keep(master match) keepusing(`varlist')

foreach var of local varlist {
replace `var'VIN = `var' if `var'VIN == .
}

drop `varlist'
foreach var of local varlist {
rename `var'VIN `var'
}

compress
save "$Truck/processed_data/EventData/Insp_Crash_Event_VIN_controlgp1.dta", replace

* VIN all missing, use lic
use "$Truck/processed_data/EventData/insp_panel/insp_event_control_gp1.dta", clear

keep if insp_unit_vehicle_id_number == ""

local varlist "report_date report_time crash_time crash fatalities injuries vehicles_in_accident"

merge m:1 date insp_unit_license insp_unit_license_state using "$Truck/processed_data/CrashData/Crash_alllic", nogen keep(master match) keepusing(`varlist')

save "$Truck/processed_data/EventData/Insp_Crash_Event_lic_controlgp1.dta", replace

* append two datasets
use "$Truck/processed_data/EventData/Insp_Crash_Event_lic_controlgp1.dta",clear
append using "$Truck/processed_data/EventData/Insp_Crash_Event_VIN_controlgp1.dta"

local crash_var "crash fatalities injuries vehicles_in_accident"
foreach i of local crash_var {
	replace `i' = 0 if `i' == .
}

gen post_insp = (event_time >= 0 )

format date %td

gen year = year(date)
gen month = month(date)
gen day = day(date)
gen dayofweek = dow(date)

gen insp_start_hr = floor(insp_start_time/100)
gen insp_start_min = insp_start_time - insp_start_hr * 100

gen insp_time = mdyhms(insp_month,insp_day,insp_year,insp_start_hr,insp_start_min,0)

drop insp_start_hr insp_start_min

compress
save "$Truck/processed_data/EventData/division_100pct/Insp_Crash_control_gp1.dta", replace 



* load data 
use "$Truck/processed_data/EventData/division_100pct/Insp_Crash_control_gp1.dta", clear 

if 1 {
	drop new_VIN 

	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "CARRIER"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "DRIVER"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "UNK"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "NONE"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "UNKNOWN"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "NA"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "N/A"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "00000000000000000"
	replace insp_unit_vehicle_id_number = "" if strpos(insp_unit_vehicle_id_number,"RYDER")>0
	replace insp_unit_vehicle_id_number = "" if strpos(insp_unit_vehicle_id_number,"9999999")>0
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "PENSKE"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "FLEET"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "INTERCHANGEABLE"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "unknown"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "0"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "00"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "000"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "0000"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "00000"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "000000"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "1"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "//"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "``"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "XTRALEASE"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "XTRA LEASE"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "UNREADABLE"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "UK"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "SHIPPER"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "SAME"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "NOTFOUND"
	replace insp_unit_vehicle_id_number = "" if insp_unit_vehicle_id_number == "MAPPER"

	replace insp_unit_license_state = "" if insp_unit_license_state == "UK"
	replace insp_unit_license = "" if insp_unit_license == "UNKNOWN"
	replace insp_unit_license = "" if insp_unit_license == "UK"
	replace insp_unit_license = "" if insp_unit_license == "UNK"
	replace insp_unit_license = "" if insp_unit_license == "9"
	replace insp_unit_license = "" if insp_unit_license == "UNK"
	replace insp_unit_license = "" if insp_unit_license == "NONE"
	replace insp_unit_license = "" if insp_unit_license == "TEMP"
	replace insp_unit_license = "" if insp_unit_license == "NA"
	replace insp_unit_license = "" if insp_unit_license == "N/A"
	replace insp_unit_license = "" if insp_unit_license == "NEW"
	replace insp_unit_license = "" if strpos(insp_unit_license,"NO")>0
	replace insp_unit_license = "" if strpos(insp_unit_license,"T.O.P")>0
	replace insp_unit_license = "" if strpos(insp_unit_license,"TOP")>0
	replace insp_unit_license = "" if strpos(insp_unit_license,"ONLY")>0
	replace insp_unit_license = "" if strpos(insp_unit_license,"TEMP")>0
	replace insp_unit_license = "" if insp_unit_license == "PENDING"
	replace insp_unit_license = "" if insp_unit_license == "VIN"
	replace insp_unit_license = "" if insp_unit_license == "APPLIED"
	replace insp_unit_license = "" if insp_unit_license == "INTRANSIT"
	replace insp_unit_license = "" if insp_unit_license == "L9999"
	replace insp_unit_license = "" if insp_unit_license == "PERMIT"

	egen new_VIN = group(insp_unit_vehicle_id_number)
	egen temp1 = group(insp_unit_license insp_unit_license_state)
	replace new_VIN = -temp1 if new_VIN == .
	drop temp1 
}

* select sample with no change in treatment status 
bys new_VIN: gegen avg_treat =  mean(treat_gp)

keep if avg_treat == 1 | avg_treat == 0 
	// drop 0.05%
drop avg_treat 

* outside event window insp indicator
bysort new_VIN: gegen last_insp = max(mdy(insp_month, insp_day, insp_year))
bysort new_VIN: gegen first_insp = min(mdy(insp_month, insp_day, insp_year))

gen insp_p = 0
gen insp_m = 0
replace insp_p = 1 if mdy(insp_month, insp_day, insp_year) > first_insp
replace insp_m = 1 if mdy(insp_month, insp_day, insp_year) < last_insp

gegen new_group_id = group(dot_number new_VIN treat_gp insp_date)

* drop events within 3,6,12,18,24 hours of inspection
foreach t of numlist 18 {
	local hr_`t' = mdyhms(1,1,2000,`t',0,0)-mdyhms(1,1,2000,0,0,0)

	gen temp_crash_`t'hr = 1 if crash_time >= insp_time - `hr_`t'' & crash_time < insp_time
	bysort new_group_id: gegen crash_event_`t'hr = max(temp_crash_`t'hr)
}

drop temp_crash*

*** event study
* generate inspection lead and lags
tsset new_group_id event_time

gen inspection = (event_time == 0)

quietly forvalues i=1/14 { 
gen byte F`i'insp=F`i'.inspection
replace F`i'insp = 0 if F`i'insp == .

gen byte L`i'insp=L`i'.inspection
replace L`i'insp = 0 if L`i'insp == .
}

* 2-day bins
egen insp_m0_1=rowmax(inspection L1insp)
replace insp_m0_1=0 if insp_m0_1==.

quietly forvalues i=2(2)12 {
local k=`i'+1
egen insp_m`i'_`k'=rowmax(L`i'insp L`k'insp)
replace insp_m`i'_`k'=0 if insp_m`i'_`k'==.
}

quietly forvalues i=1(2)13 {
local k=`i'+1
egen insp_p`i'_`k'=rowmax(F`i'insp F`k'insp)
replace insp_p`i'_`k'=0 if insp_p`i'_`k'==.
}	

compress

*** dynamic DID
local insp_F
forvalues t = 14(-2)4{
local s = `t'-1
local insp_F `insp_F' insp_p`s'_`t'
}
forvalues t = 1(2)13 {
local s = `t'-1
local insp_F `insp_F' insp_m`s'_`t'
}

foreach var of varlist `insp_F' {
	replace `var' = 0 if treat_gp == 0 
}

reghdfe crash `insp_F' insp_p insp_m if crash_event_18hr ~=1  & treat_gp == 1, absorb(dot_number year month dayofweek ) vce(cluster dot_number)
reghdfe crash `insp_F' insp_p insp_m if crash_event_18hr ~=1  & treat_gp == 0, absorb(dot_number year month dayofweek ) vce(cluster dot_number)


reghdfe crash `insp_F' insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN) poolsize(5) compact nosample 


*** plots
clear all

set obs 14

gen event_day = (_n - 8)*2

rename (var2 var3 var4 ) (treat treat_lb treat_ub)
rename (var5 var6 var7 ) (cont cont_lb cont_ub)

local treat_dep_mean = 0.0001122
local cont_dep_mean = 0.0001046

foreach var of varlist treat treat_ub treat_lb {
	gen pct_`var' = `var' / `treat_dep_mean' * 100
}
foreach var of varlist cont cont_ub cont_lb {
	gen pct_`var' = `var' / `cont_dep_mean' * 100
}

local color_treat "forest_green"
local color_cont "dkorange"

twoway (rarea pct_treat_ub pct_treat_lb event_day if event_day <= -4, color(`color_treat'%20) lcolor(bg)) ///
	(rarea pct_treat_ub pct_treat_lb event_day if inrange(event_day,0,12), color(`color_treat'%20) lcolor(bg)) ///
	(rarea pct_cont_ub pct_cont_lb event_day if event_day <= -4, color(`color_cont'%20) lcolor(bg)) ///
	(rarea pct_cont_ub pct_cont_lb event_day if inrange(event_day,0,12), color(`color_cont'%20) lcolor(bg)) ///
	(connected pct_treat event_day if inrange(event_day,-14,12), msize(medium) mcolor(`color_treat') lcolor(`color_treat') lpattern(solid) lwidth(medium)) ///
	(connected pct_cont event_day if inrange(event_day,-14,12), msize(medium) mcolor(`color_cont') lcolor(`color_cont') lpattern(dash) lwidth(medium))  ///
	(scatteri 80 0 -40 0 , recast(line) lp(solid) lcolor(gs10%50) lwidth(0.6)), ///
	legend(order(5 "treatment group" 6 "control group")) xlabel(-14(2)12) ylabel(-20(20)80, nogrid) ///
	xtitle("day") ytitle("% change (day-2=0)") graphregion(color(white)) xsize(4.5)
	
graph export "$Truck/r&r/new results/treat_cont_gp1.pdf", replace	



* plot panel b
use "$Truck/r&r/new results/revision_plots.dta",clear

rename (var72 var73 var74) (uninsp lb_uninsp ub_uninsp)

local mean = 0.0001082
replace uninsp = uninsp/`mean' * 100
replace lb_uninsp = lb_uninsp/`mean' * 100
replace ub_uninsp = ub_uninsp/`mean' * 100

local color_treat "black"

local var uninsp
local xaxis event_day
twoway (rarea lb_`var' ub_`var' event_day if event_day <= -4, color(`color_treat'%20) lcolor(bg)) ///
	(rarea lb_`var' ub_`var' event_day if inrange(event_day,0,12), color(`color_treat'%20) lcolor(bg)) ///
	(connected `var' event_day if inrange(event_day,-14,12), msize(medium) mcolor(`color_treat') lcolor(`color_treat') lpattern(solid) lwidth(medium)) ///
	(scatteri 80 0 -20 0 , recast(line) lp(solid) lcolor(gs10%50) lwidth(0.6)), ///
	yline(0, lp(dash) lcol(black%50)) ///
	legend(off) xlabel(-14(2)12, nogrid) ylabel(-20(20)80, nogrid) ///
	xtitle("day") ytitle("% change (day-2=0)") graphregion(color(white)) xsize(4.5)
	
graph export "$Truck/r&r/new results/uninsp_dynamic.pdf", replace	









*-------------------------------------------------------------------------
/* figure A11 & Table A16 */
* drop events within 3,6,12,18,24 hours of inspection
use crash crash_time insp_time insp_p insp_m event_time new_VIN new_group_id year month dayofweek post_insp  flag_oos crash_event_18hr using "$Truck/processed_data/EventData/division_100pct/Insp_Crash_100pct_outcome.dta" if inrange(event_time,-14,13) & flag_oos ~=1 ,clear

foreach t of numlist 3 6 12 {
	local hr_`t' = mdyhms(1,1,2000,`t',0,0)-mdyhms(1,1,2000,0,0,0)

	gen temp_crash_`t'hr = 1 if crash_time >= insp_time - `hr_`t'' & crash_time < insp_time
	bysort new_group_id: gegen crash_event_`t'hr = max(temp_crash_`t'hr)
}

	local hr_24 = mdyhms(1,2,2000,0,0,0)-mdyhms(1,1,2000,0,0,0)
	gen temp_crash_24hr = 1 if crash_time >= insp_time - `hr_24' & crash_time < insp_time
	bysort new_group_id: gegen crash_event_24hr = max(temp_crash_24hr)

drop temp_crash*


	reghdfe crash post_insp insp_p insp_m, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 
	summ crash if e(sample)

foreach t of numlist 3 6 12 18 24 {
		reghdfe crash post_insp insp_p insp_m if crash_event_`t'hr ~= 1, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 
	summ crash if e(sample)
}


* drop crash within XX hours before inspection
use "$Truck/outputs/Log/plots/figures_Sep2019/figures", clear

foreach var of varlist var29 var30 var31 var32 var33 var34 {
	replace `var' = `var' * 10^5
}

rename var29 raw
rename var30 hr_3
rename var31 hr_6
rename var32 hr_12
rename var33 hr_18
rename var34 hr_24



twoway (connected raw event_day, lwidth(medium) msize(vsmall) lp(longdash_dot)) ///
	(connected hr_3 event_day, lwidth(medium) msize(vsmall) lp(dash)) ///
	(connected hr_6 event_day, lwidth(medium) msize(vsmall) lp(longdash)) ///
	(connected hr_12 event_day, lwidth(medium) msize(vsmall) lp(shortdash)) ///
	(connected hr_18 event_day, lcolor(black) mcolor(black) lwidth(medium) msize(vsmall) lp(solid)) ///
	(connected hr_24 event_day, lwidth(medium) msize(vsmall) lp(shortdash_dot)) if inrange(event_day,-14,12), ///
	xtitle("") ytitle("crash per 100,000 trucks") xlabel(-14(2)12) ylabel(-2(1)9, nogrid) ///
	graphregion(color(white)) xsize(4.5) ///
	legend(order(1 "all" 2 "drop crash -3hr" 3 "drop crash -6hr" 4 "drop crash -12hr" 5 "drop crash -18hr" 6 "drop crash -24hr"))

graph export "division_100pct/crash_Xhr.pdf", replace	



*-----------------------------------------------------------------------------
* figure A12

local insp_F
forvalues t = 14(-2)4{
local s = `t'-1
local insp_F `insp_F' insp_p`s'_`t'
}
forvalues t = 1(2)13 {
local s = `t'-1
local insp_F `insp_F' insp_m`s'_`t'
}

local insp_event year month date dayofweek new_group_id insp_p insp_m event_time post_insp new_VIN inspection
local crash_event crash crash_event* event_id1 event_id2 event_id3 event_id4 vehicles_in_accident

use `insp_F' `insp_event' `crash_event' using "$Truck/processed_data/EventData/division_100pct/Insp_Crash_100pct.dta", clear nolabel

* output table: 
clear
set obs 14 
foreach i of numlist 1/21 98 {
gen coeff_c`i' = .
gen dep_mean_c`i' = .
gen post_insp_c`i' = .
}
gen event_time = (_n -8)*2
save "$Truck/outputs/Log/division_100pct/crash_category2.dta", replace



qui foreach c of numlist 1/21 98 {
	
	noi di " *** Processing for crash category `c' ****"

	gen crash_c`c' = 1 if event_id1 == `c' | event_id2 == `c' | event_id3 == `c' | event_id4 == `c'
	replace crash_c`c' = 0 if crash_c`c' == .

	local insp_F
	forvalues t = 14(-2)4{
	local s = `t'-1
	local insp_F `insp_F' insp_p`s'_`t'
	}
	forvalues t = 1(2)13 {
	local s = `t'-1
	local insp_F `insp_F' insp_m`s'_`t'
	}


	_regress crash_c`c' `insp_F' insp_p insp_m i.year i.month i.dayofweek if inrange(event_time,-14,13) & crash_event_18hr ~= 1, absorb(new_VIN)	
	
	* store results
	quietly forvalues i=0(2)12 {
	local k=`i'+1
	local c`c'_insp_m`i'_`k' = _b[insp_m`i'_`k']
	}

	quietly forvalues i=3(2)13 {
	local k=`i'+1
	local c`c'_insp_p`i'_`k' = _b[insp_p`i'_`k']
	}	
			
		
	_regress crash_c`c' post_insp insp_p insp_m i.year i.month i.dayofweek if inrange(event_time,-14,13) & crash_event_18hr ~= 1, absorb(new_VIN)	
	
		local post_insp_c`c' = _b[post_insp]
	
	summ crash_c`c' if e(sample)
	
		local dep_mean_c`c' = r(mean)
}	


use "$Truck/outputs/Log/division_100pct/crash_category2.dta", clear

qui foreach c of numlist 1/21 98 {

	quietly forvalues i = 0(2)12 {
	local k=`i'+1
	replace coeff_c`c' = `c`c'_insp_m`i'_`k'' if event_time == `i'
	}
	quietly forvalues i=3(2)13 {
	local k=`i'+1
	replace coeff_c`c' = `c`c'_insp_p`i'_`k'' if event_time == -`k'
	}

	replace dep_mean_c`c' = `dep_mean_c`c''
	replace post_insp_c`c' = `post_insp_c`c''

}

compress
save "$Truck/outputs/Log/division_100pct/crash_category2.dta", replace

* plot
use "$Truck/outputs/Log/division_100pct/crash_category2.dta", clear

rename var20 coeff_c13  // multi-veh
gen dep_mean_c13 = 0.0000406

foreach c of numlist 1/21 98 {
	gen pct_c`c' = coeff_c`c' / dep_mean_c`c' * 100
	replace pct_c`c' = 0 if pct_c`c' == .
}



tw  (line pct_c1 event_time, lp(dash) lw(.4)) ///
	(line pct_c3 event_time, lp(dash_dot) lw(.4)) ///
	(line pct_c5 event_time, lp(shortdash) lw(.4)) ///
	(line pct_c9 event_time, lp(longdash) lw(.4)) ///
	(line pct_c18 event_time, lp(longdash_dot) lw(.4)) ///
	(line pct_c13 event_time, lp(solid) lw(.5)), ///
	legend(order(1 "off road" 2 "rollover" 3 "cargo loss" 4 "equip fail" 5 "collision w/ object" 6 "multi-vehicle")) ///
	xtitle("") ytitle("% change (day-2 = 0)") xlabel(-14(2)12) ylabel(-50(25)100, nogrid) ///
	graphregion(color(white)) xsize(4.5)

graph export "plots/division_100pct/crash_cate2.pdf", replace







*-----------------------------------------------------------------------------
* figure A13

* Sun and Abraham 2020AER
use "$Truck/processed_data/EventData/sample_1pct/Insp_Crash_Event_1pct.dta" if inrange(event_time,-14,13) & insp_facility ~= "F", clear

* prepare the 1pct dataset
rename insp_month insp_temp
drop insp_p* insp_m* cum* insp_facility fatalities injuries code_model_year lifetime timeBinsp model_year age crash_post_insp no_inj_crash noinjcrash_post_insp last_insp first_insp
rename insp_temp insp_month 

gen insp_start_hr = floor(insp_start_time/100)
gen insp_start_min = insp_start_time - insp_start_hr * 100

gen insp_time = mdyhms(insp_month,insp_day,insp_year,insp_start_hr,insp_start_min,0)

gen crash_hr = floor(report_time/100)   
gen crash_min = report_time - crash_hr * 100

gen crash_time = mdyhms(crash_month,crash_day,crash_year,crash_hr,crash_min,0)

drop crash_hr crash_min crash_year crash_month crash_day insp_start_min insp_start_hr


* gen new id
gegen new_license = group(insp_unit_license_state insp_unit_license)
replace new_license = 0 if new_license == .
replace new_license = - new_license  

gegen new_VIN = group(insp_unit_vehicle_id_number)
replace new_VIN = new_license if insp_unit_vehicle_id_number == ""

gegen new_group_id = group(new_VIN inspection_id insp_year)
replace new_group_id = 0 if new_group_id == .
replace new_group_id = . if event_time == .


* outside event window insp indicator
bysort new_VIN: gegen last_insp = max(mdy(insp_month, insp_day, insp_year))
bysort new_VIN: gegen first_insp = min(mdy(insp_month, insp_day, insp_year))

gen insp_p = 0
gen insp_m = 0
replace insp_p = 1 if mdy(insp_month, insp_day, insp_year) > first_insp
replace insp_m = 1 if mdy(insp_month, insp_day, insp_year) < last_insp

* drop crash within 18 hours
foreach t of numlist 18 {
	local hr_`t' = mdyhms(1,1,2000,`t',0,0)-mdyhms(1,1,2000,0,0,0)

	gen temp_crash_`t'hr = 1 if crash_time >= insp_time - `hr_`t'' & crash_time < insp_time
	bysort new_group_id: gegen crash_event_`t'hr = max(temp_crash_`t'hr)
}


* generate inspection lead and lags

tsset new_group_id event_time

quietly forvalues i=1/14 { 
gen byte F`i'insp=F`i'.inspection
replace F`i'insp = 0 if F`i'insp == .

gen byte L`i'insp=L`i'.inspection
replace L`i'insp = 0 if L`i'insp == .
}

* 2-day bins
* combine into 2-day bins to increase power [(F14,F13)...(F2,F1),(0,L1),(L2,L3)...(L12,L13)
egen insp_m0_1=rowmax(inspection L1insp)
replace insp_m0_1=0 if insp_m0_1==.

quietly forvalues i=2(2)12 {
local k=`i'+1
egen insp_m`i'_`k'=rowmax(L`i'insp L`k'insp)
replace insp_m`i'_`k'=0 if insp_m`i'_`k'==.
}

quietly forvalues i=1(2)13 {
local k=`i'+1
egen insp_p`i'_`k'=rowmax(F`i'insp F`k'insp)
replace insp_p`i'_`k'=0 if insp_p`i'_`k'==.
}	
		
* weights on pretrend

keep if inrange(event_time, -3,3) & crash_event_18hr ~= 1

gen cohort = insp_year

gen g_3 = event_time <= -3
gen g_2 = event_time == -2
gen g0 = event_time == 0 
gen g1 = event_time == 1
gen g2 = event_time == 2
gen g3 = event_time >= 3


eventstudyweights g_3 g_2 g0 g1 g2 g3, cohort(cohort) rel_time(event_time) absorb(i.new_VIN) covariates(year month dayofweek) 
mat list e(weights)

* export the weights to a spreadsheet 
keep g_2 cohort event_time
reshape wide g_2, i(event_time) j(cohort)

save "$Truck/outputs/Log/EventPlots/sample_1pct/eventstudyweights.dta"

// the point estiamtes on g_2 is exactly the number of inspections in each year

graph twoway line g_2* event_time, xtitle("Relative Wave") ytitle("weight") ///
	legend(off) ylabel(-.1(.05).1, nogrid) xlabel(-3(1)3) ///
	graphregion(fcolor(white)) scheme(sj)
graph export "$Truck/outputs/Log/EventPlots/sample_1pct/eventstudyweights_2.pdf", replace











*-----------------------------------------------------------------------------
* figure A14

use "$Truck/processed_data/TxDOT_CRIS/Insp_Crash_Event_tx.dta", clear

fmerge m:1 inspection_id insp_year using "$Truck/processed_data/InspectionData/Insp_outcome.dta", ///
	keepusing(oos_total) nogen keep(match master)

drop if oos_total > 0 & oos_total ~= .

gen crash_fed = crash_tx if fed_record == 1
replace crash_fed = 0 if crash_fed == .

local insp_F
forvalues t = 14(-2)4{
local s = `t'-1
local insp_F `insp_F' insp_p`s'_`t'
}
forvalues t = 1(2)13 {
local s = `t'-1
local insp_F `insp_F' insp_m`s'_`t'
}


reghdfe crash_fed `insp_F' insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  

reghdfe crash_fed post_insp insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  

summ crash_fed if e(sample)



* plot 
use figures, clear

// all crashes
rename var102 crash_18hr
rename var103 lb_18hr
rename var104 ub_18hr


gen pct_18hr = crash_18hr / 0.0000177 * 100
gen pct_18hr_ub = ub_18hr / 0.0000177 * 100
gen pct_18hr_lb = lb_18hr / 0.0000177 * 100


summ pct_18hr if inrange(event_day,-14,-2)
gen avg_18hr_F = r(mean)

summ pct_18hr if inrange(event_day,0,12)
gen avg_18hr_L = r(mean)


* 
summ avg_18hr_F
local avg_18hr_F = round(r(mean),1)
summ avg_18hr_L
local avg_18hr_L = round(r(mean),1)


local color "purple"

twoway (rarea pct_18hr_ub pct_18hr_lb event_day if event_day <= -4 ,fcolor(`color'*0.2) lcolor(`color'*0.01)) ///
	(rarea pct_18hr_ub pct_18hr_lb event_day if inrange(event_day,0,12), fcolor(`color'*0.2) lcolor(`color'*0.01)) ///
	(connected pct_18hr event_day if inrange(event_day,-14,12), msize(medium) mcolor(`color') lcolor(`color') lpattern(solid) lwidth(medium)) ///
	(line avg_18hr_F event_day if inrange(event_day,-14,0), lcolor(gs4) lwidth(thin) lpattern(dash)) ///
	(line avg_18hr_L event_day if inrange(event_day,0,12), lcolor(gs4) lwidth(thin) lpattern(dash)) ///
	(scatteri `avg_18hr_F' 0 `avg_18hr_L' 0 , recast(line) lp(solid) lcolor(gs4) lwidth(0.6)), ///
	legend(off) xlabel(-14(2)12) ylabel(-40(20)100, nogrid) ///
	text(30 3.5 "change=39.7%", place(c) color(`color') ) ///
	xtitle("") ytitle("effect size in %") ///
	graphregion(color(white)) xsize(6.5)
	
	
graph export "$Truck/writing/paper/figures/txdot_crash.pdf", replace	







*---------------------------------------------------------------------------------
* figure A15 

* falsification
gen crash_other1 = crash_tx if dr_factor1 == 0 & veh_factor == 0 & fed_record == 1 
replace crash_other1 = 0 if crash_other1 == .

replace crash_other1 = 0 if Icharge > 0 & Icharge ~= .

local insp_F
forvalues t = 14(-2)4{
local s = `t'-1
local insp_F `insp_F' insp_p`s'_`t'
}
forvalues t = 1(2)13 {
local s = `t'-1
local insp_F `insp_F' insp_m`s'_`t'
}

reghdfe crash_other1 `insp_F' insp_p insp_m , absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  
reghdfe crash_other1 post_insp insp_p insp_m, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  

summ crash_other1 if e(sample)


// other factors (not driver&veh related)
use figures, clear

rename var114 other_factor

tw bar other_factor event_day if inrange(event_day,-14,12), fintensity(inten30) fcolor(blue) lcolor(blue%0) lpattern(solid) lwidth(thin) xlabel(-14(2)12) ylabel(0(10)60) ytitle("Number of crashes, not driver/truck-related") xtitle("day") xsize(4.5) graphregion(color(white))

graph export "$Truck/writing/paper/figures/txdot_other.pdf", replace	




*---------------------------------------------------------------------------------------
* figure A16 and table A10
local insp_list insp_countyfips event_time new_VIN new_group_id year month dayofweek post_insp insp_m insp_p inspection inspection_id dot_number insp_year insp_month insp_day date insp_time flag_oos
local crash_list crash crash_event_18hr crash_time event_id1 event_id2 event_id3 event_id4 vehicles_in_accident

use `insp_list' `crash_list' using "$Truck/processed_data/EventData/division_100pct/Insp_Crash_100pct_outcome.dta" if inrange(event_time,-14,13) & crash_event_18hr ~=1 & flag_oos ~=1, clear

* find out insp blitz days
gen insp_date = mdy(insp_month,insp_day,insp_year)
gen insp_dow = dow(insp_date)

gen roadcheck_day = 1 if insp_month == 6 & insp_day <= 8 & insp_day > 1 & insp_dow == 2

gen date_roadcheck_day = insp_date if roadcheck_day == 1
bysort insp_year: egen date_roadcheck = max(date_roadcheck_day)

replace roadcheck_day = 1 if inrange(insp_date,date_roadcheck + 1,date_roadcheck + 2)

*** look at blitz days vs other days 
gen treat_gp5 = .
replace treat_gp5 = 1 if inrange(insp_date,date_roadcheck-28,date_roadcheck-15)
replace treat_gp5 = 2 if inrange(insp_date,date_roadcheck-14,date_roadcheck-1)
replace treat_gp5 = 3 if roadcheck_day == 1
replace treat_gp5 = 4 if inrange(insp_date,date_roadcheck+3,date_roadcheck+16)

gen treat_week_p2 = 1 if treat_gp5 == 2
replace treat_week_p2 = 0 if treat_gp5 == 1 | treat_gp5 == 3 | treat_gp5 == 4 

reghdfe crash post_insp##i.treat_gp5 insp_p insp_m, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  
summ crash if e(sample)

reghdfe crash post_insp##i.treat_week_p2 insp_p insp_m, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  



tab inspection treat_gp5 
count if inspection == 1 & inrange(insp_date,date_roadcheck+17,date_roadcheck+30)

forvalues i = 1/4 {
reghdfe crash post_insp insp_p insp_m if treat_gp5 == `i', absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  
summ crash if e(sample)
}


* plot
use "$Truck/r&r/new results/revision_plots.dta",clear

gen group = 1 in 1
replace group = 2 in 2
replace group = 3 in 3
replace group = 4 in 4

lab define label_group 1 "4 to 2 weeks before" 2 "2 weeks before" 3 "blitz" 4 "2 weeks after", replace 
lab values group label_group 

rename (var15 var16 var17) (post lb_post ub_post)

local var post
local xaxis group
tw 	rcap lb_`var' ub_`var' `xaxis',  msize(8) col(black) || ///
	bar `var' `xaxis' if group ~=2, barw(0.5) col(blue%40) lw(0.2) lcol(white) || ///
	bar `var' `xaxis' if group ==2, barw(0.5) col(red%40) lw(0.2) lcol(white) ///
	ylab(,nogrid) ///
	xlab(1(1)4, valuelabel nogrid) ///
	ytitle("% change in crash probability") ///
	xtitle("") ///
	legend(off) ///
	graphregion(col(white)) xsize(7)
gr export "$Truck/r&r/new results/blitz.pdf", replace







*-----------------------------------------------------------------------------------
* figure a17 and table A11 

local insp_list event_time new_VIN new_group_id year month dayofweek post_insp flag_oos insp_m insp_p inspection inspection_id insp_unit_license insp_unit_license_state insp_unit_vehicle_id_number insp_year insp_month insp_day date insp_time insp_end_time
local crash_list crash crash_event_18hr crash_time 

use `insp_list' `crash_list' using "$Truck/processed_data/EventData/division_100pct/Insp_Crash_100pct_outcome.dta" if inrange(event_time,-14,13) & crash_event_18hr ~=1 & flag_oos ~=1, clear

* merge in insp level
fmerge m:1 inspection_id insp_year using "$Truck/processed_data/InspectionData/reg_date.dta", keepusing(insp_level_id) nogen keep(master match)

* merge in additional checks
fmerge m:1 inspection_id insp_year using "$Truck/processed_data/InspectionData/Insp_outcome.dta", keepusing(alcohol_control_sub_n drug_intrdctn_search_n size_weight_enf_n traffic_enf_n) nogen keep(match master)

gen end_h = floor(insp_end_time/100)
gen end_m = insp_end_time - end_h * 100

gen end_time = mdyhms(insp_month,insp_day,insp_year,end_h,end_m,0)

gen duration = end_time - insp_time
replace duration = . if duration < 0

* drop special days 
if 1 {
gen insp_date = mdy(insp_month,insp_day,insp_year)
gen insp_dow = dow(date)

* 1. roadcheck 72 hours
gen roadcheck_day = 1 if insp_month == 6 & insp_day <= 8 & insp_day > 1 & insp_dow == 2

gen date_roadcheck_day = insp_date if roadcheck_day == 1
bysort insp_year: egen date_roadcheck = max(date_roadcheck_day)

replace roadcheck_day = 1 if inrange(insp_date,date_roadcheck + 1,date_roadcheck + 2)

* 2. brake week
gen brakeweek_day = 1 if insp_month == 9 & insp_day > 5 & insp_day <= 12 & insp_dow == 0

gen date_brakeweek_day = insp_date if brakeweek_day == 1
bysort insp_year: gegen date_brakeweek = max(date_brakeweek_day)
replace date_brakeweek = mdy(9,7,2017) if insp_year == 2017

replace brakeweek_day = 1 if inrange(insp_date, date_brakeweek, date_brakeweek+6)

* 3.  brake day
gen brakecheck_day = 1 if insp_month == 5 & insp_day <= 7 & insp_dow == 3
replace brakecheck_day = . if insp_year == 2018
replace brakecheck_day = 1 if insp_date == mdy(4,25,2018)

* 4. holidays 
gen holiday = 0
replace holiday = 1 if insp_month == 1 & insp_day == 1 // New Year
replace holiday = 1 if insp_month == 1 & insp_dow == 1 & insp_day >= 15 & insp_day <= 21  // MLK
replace holiday = 1 if insp_month == 2 & insp_dow == 1 & insp_day >= 15 & insp_day <= 21  // GW
replace holiday = 1 if insp_month == 5 & insp_dow == 1 & insp_day >= 25 & insp_day <= 31 // memorial
replace holiday = 1 if insp_month == 7 & insp_day==4 // indep
replace holiday = 1 if insp_month == 9 & insp_dow == 1 & insp_day >= 1 & insp_day <= 7 // memorial
replace holiday = 1 if insp_month == 10 & insp_dow == 1 & insp_day >= 8 & insp_day <= 14 // labor
replace holiday = 1 if insp_month == 11 & insp_day == 11 // veterans
replace holiday = 1 if insp_month == 11 & insp_dow == 4 & insp_day >= 22 & insp_day <= 28 // thanksgiving
replace holiday = 1 if insp_month == 11 & insp_dow == 5 & insp_day >= 23 & insp_day <= 29 // thanksgiving
replace holiday = 1 if insp_month == 12 & insp_day == 24 // xmas
replace holiday = 1 if insp_month == 12 & insp_day == 25 // xmas

* drop special event inspections + holiday
drop if roadcheck_day == 1 | brakeweek_day == 1 | brakecheck_day == 1 | holiday == 1

drop roadcheck_day brakeweek_day brakecheck_day holiday date_roadcheck_day date_roadcheck date_brakeweek_day date_brakeweek

}


* keep level 1,2,3 only
gen insp_level = insp_level_id 
replace insp_level = . if insp_level_id == 4 | insp_level_id == 5 | insp_level_id == 6


* check duration of inspection level 
gstats tab duration , by(insp_level) s(n mean sd min p25 p50 p75 max)

* 
reghdfe crash post_insp##i.insp_level insp_p insp_m, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  


* add durations
reghdfe crash post_insp##i.insp_level post_insp##c.duration insp_p insp_m, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  



* plot 
use "$Truck/r&r/new results/revision_plots.dta", clear

gen level = 1 in 1
replace level = 2 in 2
replace level = 3 in 3

rename (var28 var29 var30) (intensity lb_intensity ub_intensity)

lab define label_level 1 "Level I" 2 "Level II" 3 "Level III", replace 
lab values level label_level 

local var intensity
local xaxis level
tw 	rcap lb_`var' ub_`var' `xaxis',  msize(8) col(black) || ///
	bar `var' `xaxis' , barw(0.5) col(blue%40) lw(0.2) lcol(white) ///
	ylab(0(20)80,nogrid) ///
	xlab(1(1)3, valuelabel nogrid) ///
	ytitle("% change in crash probability") ///
	xtitle("") ///
	legend(off) ///
	text(35 1.2 "31.6%", place(c) color(black) ) ///
	text(55 2.2 "50.8%", place(c) color(black) ) ///
	text(60 3.2 "56.7%", place(c) color(black) ) ///
	graphregion(col(white)) xsize(7)
	
gr export "$Truck/r&r/new results/insp_level.pdf", replace








*--------------------------------------------------------------------------------------
* figure a18 
local insp_list new_VIN new_group_id year month dayofweek post_insp flag_oos insp_m insp_p inspection inspection_id insp_month insp_day insp_year date insp_end_time insp_time event_time insp_countyfips dot_number
local crash_list crash crash_event_18hr 

use `insp_list' `crash_list' using "$Truck/processed_data/EventData/division_100pct/Insp_Crash_100pct_outcome.dta" if inrange(event_time,-14,13) & crash_event_18hr ~=1 & flag_oos ~=1 , clear

* drop special days 
if 1 {
gen insp_date = mdy(insp_month,insp_day,insp_year)
gen insp_dow = dow(date)

* 1. roadcheck 72 hours
gen roadcheck_day = 1 if insp_month == 6 & insp_day <= 8 & insp_day > 1 & insp_dow == 2

gen date_roadcheck_day = insp_date if roadcheck_day == 1
bysort insp_year: egen date_roadcheck = max(date_roadcheck_day)

replace roadcheck_day = 1 if inrange(insp_date,date_roadcheck + 1,date_roadcheck + 2)

* 2. brake week
gen brakeweek_day = 1 if insp_month == 9 & insp_day > 5 & insp_day <= 12 & insp_dow == 0

gen date_brakeweek_day = insp_date if brakeweek_day == 1
bysort insp_year: gegen date_brakeweek = max(date_brakeweek_day)
replace date_brakeweek = mdy(9,7,2017) if insp_year == 2017

replace brakeweek_day = 1 if inrange(insp_date, date_brakeweek, date_brakeweek+6)

* 3.  brake day
gen brakecheck_day = 1 if insp_month == 5 & insp_day <= 7 & insp_dow == 3
replace brakecheck_day = . if insp_year == 2018
replace brakecheck_day = 1 if insp_date == mdy(4,25,2018)

* 4. holidays 
gen holiday = 0
replace holiday = 1 if insp_month == 1 & insp_day == 1 // New Year
replace holiday = 1 if insp_month == 1 & insp_dow == 1 & insp_day >= 15 & insp_day <= 21  // MLK
replace holiday = 1 if insp_month == 2 & insp_dow == 1 & insp_day >= 15 & insp_day <= 21  // GW
replace holiday = 1 if insp_month == 5 & insp_dow == 1 & insp_day >= 25 & insp_day <= 31 // memorial
replace holiday = 1 if insp_month == 7 & insp_day==4 // indep
replace holiday = 1 if insp_month == 9 & insp_dow == 1 & insp_day >= 1 & insp_day <= 7 // memorial
replace holiday = 1 if insp_month == 10 & insp_dow == 1 & insp_day >= 8 & insp_day <= 14 // labor
replace holiday = 1 if insp_month == 11 & insp_day == 11 // veterans
replace holiday = 1 if insp_month == 11 & insp_dow == 4 & insp_day >= 22 & insp_day <= 28 // thanksgiving
replace holiday = 1 if insp_month == 11 & insp_dow == 5 & insp_day >= 23 & insp_day <= 29 // thanksgiving
replace holiday = 1 if insp_month == 12 & insp_day == 24 // xmas
replace holiday = 1 if insp_month == 12 & insp_day == 25 // xmas

* drop special event inspections + holiday
drop if roadcheck_day == 1 | brakeweek_day == 1 | brakecheck_day == 1 | holiday == 1

drop roadcheck_day brakeweek_day brakecheck_day holiday date_roadcheck_day date_roadcheck date_brakeweek_day date_brakeweek

}

* by year group 
reghdfe crash post_insp##i.insp_year insp_p insp_m, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  

gstats tab crash, s(mean) by(insp_year)


* plot 
use "$Truck/r&r/new results/revision_plots.dta", clear

gen year = 1995 + _n 

set obs 23

rename (var32 var33 var34 var35) (yr_gp_mean yr_gp lb_yr_gp ub_yr_gp)
rename var37 nbr_insp_pertruck

// replace lb_yr_gp = lb_yr_gp / yr_gp_mean * 100
// replace ub_yr_gp = ub_yr_gp / yr_gp_mean * 100

local color_treat "gs5"

local var yr_gp 
local xaxis year
twoway (rarea lb_`var' ub_`var' `xaxis', color(`color_treat'%20) lcolor(bg)) ///
	(connected `var' `xaxis', msize(medium) mcolor(`color_treat') msymbol(Oh) lcolor(`color_treat') lpattern(solid) lwidth(medium)) ///
	(function y=49, range(1996 2001) lp(dash) lcol(black%50)) ///
	(function y=50, range(2001 2006) lp(dash) lcol(black%50)) ///
	(function y=56, range(2006 2011) lp(dash) lcol(black%50)) ///
	(function y=38, range(2011 2016) lp(dash) lcol(black%50)) ///
	(function y=31, range(2016 2018) lp(dash) lcol(black%50)), ///
 	text(53 1998 "49.2%", place(c) color(black) ) ///
 	text(54 2004 "50.4%", place(c) color(black) ) ///
 	text(60 2009 "56.0%", place(c) color(black) ) ///
 	text(35 2013 "38.3%", place(c) color(black) ) ///
 	text(28 2017 "31.1%", place(c) color(black) ) ///
	legend(off) xlabel(1996(5)2018, nogrid) ylabel(0(25)100, nogrid)  ///
	xtitle("") ytitle("effect size (%)") graphregion(color(white)) xsize(6)
	
graph export "$Truck/r&r/new results/yr_gp.pdf", replace	







*-----------------------------------------------------------------------------------------
* figure a19 is drawn using R code, copied here 
# Load packages
if (!require("pacman")) install.packages("pacman")
pacman::p_load(
  raster, sf, tidyverse, tidylog, data.table, janitor,
  magrittr, lubridate, foreign, maps, sp, zoo, parallel,
  broom, furrr, haven, statar, dplyr, 
  ncdf4, tigris, mapchina
)

setwd("$Truck") 


# rest area
restarea <- fread("processed_data/TxDOT/TXrestarea.csv") %>%
  as_tibble() %>%
  clean_names() %>%
  select(-v7) 

restarea.geo <- restarea[1:90,] %>%
  st_as_sf(
    coords = c("longitude", "latitude"),
    crs = 4326, remove = FALSE
  ) 

# weigh station
weigh_st <- fread("processed_data/TxDOT/TXweighstation.csv") %>%
  as_tibble() %>%
  clean_names() 


# Load from the maps package
county_fips <- county.fips
counties_sdf <- st_as_sf(map("county", plot = FALSE, fill = TRUE)) %>% 
  rename(polyname = ID) %>% 
  full_join(county_fips) %>% 
  rename(geoid = fips) %>%
  mutate(county_fips = str_pad(as.character(geoid), 5, pad="0") ) %>%
  mutate(state_fips = substr(county_fips, 1,2))
texas_county <- counties_sdf %>%
  filter(state_fips == "48")

weigh_st <- texas_county %>%
  rename(insp_countyfips = geoid) %>%
  full_join(weigh_st) %>%
  mutate(fill = (year != "NA"))


## plot rest areas onto texas counties map
ggplot() + 
  geom_sf(data= texas_county, color = "#999999", fill = "#FFFFFF", alpha = 0.0) +
  geom_sf(data = weigh_st ,
          aes(fill = fill)) + 
  geom_point(data = restarea.geo,
             aes(x = longitude, y = latitude, colour = "Rest Area"),
             size = 1.5,
             stroke = 0.75) +
  theme_void() +
  guides(
    fill = guide_legend(title = "Weigh Station",
                        override.aes = list(size = 1, alpha = .2)),
    colour = guide_legend(title = "",
                        override.aes = list(size = 2, alpha = 1))
  ) +
  theme(
    legend.position = "right",
    text = element_text(color = "#22211d"),
    plot.background = element_rect(fill = "#FFFFFF", color = NA),
    panel.background = element_rect(fill = "#FFFFFF", color = NA),
    legend.background = element_rect(fill = "#FFFFFF", color = NA),
    legend.title = element_text(size = 12, color = "#4e4d47"),
    legend.text = element_text(size = 12)
  ) +
  scale_fill_manual(values = alpha(c('#e41a1c','#377eb8','#4daf4a','#984ea3','#ff7f00','#ffff33','#a65628','#f781bf'), .2), 
                    na.value="#f2f2f2",
                    labels = c("Y", "N")) 

ggsave("outputs/Log/map_restareas.pdf", 
       plot = last_plot(), # or give ggplot object name as in myPlot,
       width = 20, height = 15, 
       units = "cm", # other options c("in", "cm", "mm"), 
       dpi = 300,
       limitsize=FALSE)



*-------------------------------------------------------------------------------
* figure a20

* weather data by year
foreach i of numlist 1989/2018 {
	import delimited "$Weatherfolder/raw_data/`i'.csv", encoding(ISO-8859-1) clear
	keep if v3 == "SNOW" | v3 == "SNWD" | v3 == "PRCP" | v3 == "TMIN"  | v3 == "TMAX" 
	rename v1 stationID
	rename v2 date
	rename v3 description
	rename v4 mm
	gen country = substr(stationID,1,2)
	keep if country == "US"
	drop country
	drop v5 v6 v7 v8
	merge m:1 stationID using stations_fips, keep(match master) nogen
	replace mm = . if mm == -999
	collapse (mean) mm, by(countyfips date description state_abb)
	drop if date == .
	reshape wide mm, i(date countyfips) j(description) string
	
	gen insp_countyfips = countyfips
	
	save `i'weather, replace
}
use 1989weather,clear
foreach i of numlist 1990/2018 {
append using `i'weather
}

drop if insp_countyfips == ""  // one obv in every day for each county is missing by construction
save snowrain ,replace //misnomer

* calculate weather deviation from month-county average
use snowrain, clear

global weather PRCP SNOW SNWD TMAX TMIN
foreach i of global weather {
	rename mm`i' `i'
}

gen year = floor(date/10000)
gen month = floor((date - year*10000)/100)
foreach i of global weather {
bysort insp_countyfips year month: egen avg_`i' = mean(`i')
gen D_`i' = `i' - avg_`i'
replace D_`i' = 0 if D_`i' == . & avg_`i' ~= .
}
save snowrain, replace






local insp_list "post_insp insp_p insp_m year month dayofweek event_time new_VIN date new_group_id insp_time inspection insp_countyfips" 

local insp_F
forvalues t = 14(-2)4{
local s = `t'-1
local insp_F `insp_F' insp_p`s'_`t'
}
forvalues t = 1(2)13 {
local s = `t'-1
local insp_F `insp_F' insp_m`s'_`t'
}

use `insp_list' `insp_F' using "EventData/division_100pct/Insp_Crash_100pct.dta", clear


rename (date insp_countyfips) (date_td countyfips)

gen day = day(date_td)

gen str4 year_str = string(year, "%04.0f")
gen str2 month_str = string(month, "%02.0f")
gen str2 day_str = string(day, "%02.0f")

gen date = year_str + month_str + day_str

destring date, replace
format date %12.0g 


merge m:1 countyfips date using "weather/snowrain", keep(master match) keepusing(PRCP SNOW) nogen 
	// merged 90% of master (only conti US)

*drop if date > 20180911 // GHCN Daily weather data avail before 20180911
* tab state if state_ab == "" 

gen prcp_ind = (PRCP>0)
gen snow_ind = (SNOW>0)

* no observation
replace prcp_ind = . if PRCP == .
replace snow_ind = . if SNOW == .


local insp_F
forvalues t = 14(-2)4{
local s = `t'-1
local insp_F `insp_F' insp_p`s'_`t'
}
forvalues t = 1(2)13 {
local s = `t'-1
local insp_F `insp_F' insp_m`s'_`t'
}

reghdfe prcp_ind `insp_F' insp_p insp_m, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  

summ prcp_ind if e(sample)


* plot
use "$Truck/outputs/Log/plots/figures_Sep2019/figures", clear

rename var26 pe_rain
rename var27 lb_rain
rename var28 ub_rain


local dep_mean = 0.4143418

replace pe_rain = pe_rain / `dep_mean' * 100
replace lb_rain = lb_rain / `dep_mean' * 100
replace ub_rain = ub_rain / `dep_mean' * 100

*
local color "navy"

twoway (rarea lb_rain ub_rain event_day if event_day <= -4 ,fcolor(`color'%20) lcolor(`color'%1)) ///
	(rarea lb_rain ub_rain event_day if inrange(event_day,0,12), fcolor(`color'%20) lcolor(`color'%1)) ///
	(connected pe_rain event_day if inrange(event_day,-14,12), msize(medium) mcolor(`color') msymbol(Oh) lcolor(`color') lpattern(solid) lwidth(medium)) ///
	, legend(off) xlabel(-14(2)12) ylabel(-3(1)2, nogrid) ///
	xtitle("") ytitle("% change in probability of rain (day-2=0)") ///
	graphregion(color(white)) xsize(4.5)	
graph export "$Truck/outputs/Log/plots/division_100pct/rain.pdf", replace





*----------------------------------------------------------------------------
* figure A21, A22, A23

* divide into 5 segments to speed up process
* save results by 5 segments
cap mkdir "EventData/placebo_v3"

qui forvalues v = 1/5 {
clear

set obs 16

gen event_time = (_n-8)*2

local start = 1 + 100 * (`v'-1)
local end = 100 + 100 * (`v'-1)
forvalues r = `start'/`end' {
	gen n_`r' = .
	gen dep_mean_`r' = .
	gen coeff_`r' = .
}

save "$Truck/processed_data/EventData/placebo_v3/est_results_r`v'.dta", replace
}


* segment1
qui foreach r of numlist 1/100 {

noi di "***** random assignment No.`r' ******"

* expand inspection
local varlist rand_insp_id insp_facility insp_unit_license insp_unit_license_state insp_unit_vehicle_id_number new_VIN first_insp1 last_insp1 rand_insp`r' rand_insp_time`r'

use `varlist' using "InspectionData/placebo_v2/Insp_Truck_1pct_rand", clear

keep if insp_unit_vehicle_id_number ~= ""

keep if insp_facility == "F"


gen insp_year = year(rand_insp`r')
gen insp_month = month(rand_insp`r')
gen insp_day = day(rand_insp`r')

expand 29 
bysort insp_unit_vehicle_id_number insp_unit_license insp_unit_license_state rand_insp_id rand_insp`r': gen event_time = _n - 15	

assert inrange(event_time,-14,14)

gen date = mdy(insp_month,insp_day,insp_year)
replace date = date + event_time
replace rand_insp`r' = . if event_time ~= 0

save "EventData/placebo_v3/insp_event_VIN_1pct_r`r'.dta", replace




use `varlist' using "InspectionData/placebo_v2/Insp_Truck_1pct_rand", clear

keep if insp_unit_vehicle_id_number == ""

keep if insp_facility == "F"

gen insp_year = year(rand_insp`r')
gen insp_month = month(rand_insp`r')
gen insp_day = day(rand_insp`r')

expand 29 
bysort insp_unit_vehicle_id_number insp_unit_license insp_unit_license_state rand_insp_id rand_insp`r': gen event_time = _n - 15	

assert inrange(event_time,-14,14)

gen date = mdy(insp_month,insp_day,insp_year)
replace date = date + event_time
replace rand_insp`r' = . if event_time ~= 0

save "EventData/placebo_v3/insp_event_lic_1pct_r`r'.dta", replace





* merge in true crash history
*VIN
use "EventData/placebo_v3/insp_event_VIN_1pct_r`r'.dta", clear
 
local varlist "report_date report_time crash_time crash"

merge m:1 date insp_unit_vehicle_id_number using "CrashData/Crash_allVIN", nogen keep(master match) keepusing(`varlist')

foreach var of local varlist {
rename `var' `var'VIN
}

merge m:1 date insp_unit_license insp_unit_license_state using "CrashData/Crash_alllic", nogen keep(master match) keepusing(`varlist')

foreach var of local varlist {
replace `var'VIN = `var' if `var'VIN == .
}

drop `varlist'
foreach var of local varlist {
rename `var'VIN `var'
}

compress
save "EventData/placebo_v3/Insp_Crash_Event_VIN_1pct_r`r'.dta", replace


*lic
use "EventData/placebo_v3/insp_event_lic_1pct_r`r'.dta", clear

merge m:1 date insp_unit_license insp_unit_license_state using "CrashData/Crash_alllic", nogen keep(master match) keepusing(`varlist')

save "EventData/placebo_v3/Insp_Crash_Event_lic_1pct_r`r'.dta", replace

* append two datasets
use "EventData/placebo_v3/Insp_Crash_Event_lic_1pct_r`r'.dta",clear
append using "EventData/placebo_v3/Insp_Crash_Event_VIN_1pct_r`r'.dta"

gen inspection = 1 if event_time == 0
replace inspection = 0 if inspection == .
replace crash = 0 if crash == .


* gen dep & indep var
gen insp_start_hr = floor(rand_insp_time`r'/100)
gen insp_start_min = rand_insp_time`r' - insp_start_hr * 100

gen insp_time = mdyhms(insp_month,insp_day,insp_year,insp_start_hr,insp_start_min,0)

drop insp_start_hr insp_start_min

gen post_insp = (event_time >= 0 )

gen year = year(date)
gen month = month(date)
gen dayofweek = dow(date)

compress
save "EventData/placebo_v3/Insp_Crash_Event_1pct_r`r'.dta", replace



gegen new_group_id = group(new_VIN rand_insp_id insp_year)
replace new_group_id = 0 if new_group_id == .
replace new_group_id = . if event_time == .


* outside event window insp indicator
gen insp_p = 0
gen insp_m = 0
replace insp_p = 1 if mdy(insp_month, insp_day, insp_year) > first_insp1
replace insp_m = 1 if mdy(insp_month, insp_day, insp_year) < last_insp1


* drop events within 3,6,12,18,24 hours of inspection
foreach t of numlist 18 {
	local hr_`t' = mdyhms(1,1,2000,`t',0,0)-mdyhms(1,1,2000,0,0,0)

	gen temp_crash_`t'hr = 1 if crash_time >= insp_time - `hr_`t'' & crash_time < insp_time
	bysort new_group_id: gegen crash_event_`t'hr = max(temp_crash_`t'hr)
}

drop temp_crash*


* generate inspection lead and lags
bys new_group_id event_time: gen N=_N
assert N == 1
drop N

tsset new_group_id event_time

quietly forvalues i=1/14 { 
gen byte F`i'insp=F`i'.inspection
replace F`i'insp = 0 if F`i'insp == .

gen byte L`i'insp=L`i'.inspection
replace L`i'insp = 0 if L`i'insp == .
}


* 2-day bins
* combine into 2-day bins to increase power [(F14,F13)...(F2,F1),(0,L1),(L2,L3)...(L12,L13)
egen insp_m0_1=rowmax(inspection L1insp)
replace insp_m0_1=0 if insp_m0_1==.

quietly forvalues i=2(2)12 {
local k=`i'+1
egen insp_m`i'_`k'=rowmax(L`i'insp L`k'insp)
replace insp_m`i'_`k'=0 if insp_m`i'_`k'==.
}

quietly forvalues i=1(2)13 {
local k=`i'+1
egen insp_p`i'_`k'=rowmax(F`i'insp F`k'insp)
replace insp_p`i'_`k'=0 if insp_p`i'_`k'==.
}	
		

		
compress
save "$Truck/processed_data/EventData/placebo_v3/Insp_Crash_1pct_r`r'.dta", replace




noi di "regression"
* save regression results


use "$Truck/processed_data/EventData/placebo_v3/Insp_Crash_1pct_r`r'.dta", clear

local insp_F
forvalues t = 14(-2)4{
local s = `t'-1
local insp_F `insp_F' insp_p`s'_`t'
}
forvalues t = 1(2)13 {
local s = `t'-1
local insp_F `insp_F' insp_m`s'_`t'
}

reghdfe crash `insp_F' insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  

	* store results
	quietly forvalues i=0(2)12 {
	local k=`i'+1
	local insp_m`i'_`k' = _b[insp_m`i'_`k']
	}

	quietly forvalues i=3(2)13 {
	local k=`i'+1
	local insp_p`i'_`k' = _b[insp_p`i'_`k']
	}	
			
reghdfe crash post_insp insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  

	local post_insp = _b[post_insp]
	local se_postinsp = _se[post_insp]
	

* mean of dep variable
summ crash if e(sample)
	
	* store results
	local dep_mean = r(mean)
	local n = e(N)


use "$Truck/processed_data/EventData/placebo_v3/est_results_r1.dta", clear

forvalues i = 0(2)12 {

	local k=`i'+1

	replace coeff_`r' = `insp_m`i'_`k'' if event_time == `i'
}

forvalues i=3(2)13 {

	local k=`i'+1

	replace coeff_`r' = `insp_p`i'_`k'' if event_time == -`k'
}

replace coeff_`r' = `post_insp' if event_time == 14
replace coeff_`r' = `se_postinsp' if event_time == 16

replace n_`r' = `n'
replace dep_mean_`r' = `dep_mean'

compress
save  "$Truck/processed_data/EventData/placebo_v3/est_results_r1.dta", replace

}




* segment2
qui foreach r of numlist 101/200 {

noi di "***** random assignment No.`r' ******"

* expand inspection
local varlist rand_insp_id insp_facility insp_unit_license insp_unit_license_state insp_unit_vehicle_id_number new_VIN first_insp1 last_insp1 rand_insp`r' rand_insp_time`r'

use `varlist' using "InspectionData/placebo_v2/Insp_Truck_1pct_rand", clear

keep if insp_unit_vehicle_id_number ~= ""

keep if insp_facility == "F"


gen insp_year = year(rand_insp`r')
gen insp_month = month(rand_insp`r')
gen insp_day = day(rand_insp`r')

expand 29 
bysort insp_unit_vehicle_id_number insp_unit_license insp_unit_license_state rand_insp_id rand_insp`r': gen event_time = _n - 15	

assert inrange(event_time,-14,14)

gen date = mdy(insp_month,insp_day,insp_year)
replace date = date + event_time
replace rand_insp`r' = . if event_time ~= 0

save "EventData/placebo_v3/insp_event_VIN_1pct_r`r'.dta", replace




use `varlist' using "InspectionData/placebo_v2/Insp_Truck_1pct_rand", clear

keep if insp_unit_vehicle_id_number == ""

keep if insp_facility == "F"

gen insp_year = year(rand_insp`r')
gen insp_month = month(rand_insp`r')
gen insp_day = day(rand_insp`r')

expand 29 
bysort insp_unit_vehicle_id_number insp_unit_license insp_unit_license_state rand_insp_id rand_insp`r': gen event_time = _n - 15	

assert inrange(event_time,-14,14)

gen date = mdy(insp_month,insp_day,insp_year)
replace date = date + event_time
replace rand_insp`r' = . if event_time ~= 0

save "EventData/placebo_v3/insp_event_lic_1pct_r`r'.dta", replace





* merge in true crash history
*VIN
use "EventData/placebo_v3/insp_event_VIN_1pct_r`r'.dta", clear
 
local varlist "report_date report_time crash_time crash"

merge m:1 date insp_unit_vehicle_id_number using "CrashData/Crash_allVIN", nogen keep(master match) keepusing(`varlist')

foreach var of local varlist {
rename `var' `var'VIN
}

merge m:1 date insp_unit_license insp_unit_license_state using "CrashData/Crash_alllic", nogen keep(master match) keepusing(`varlist')

foreach var of local varlist {
replace `var'VIN = `var' if `var'VIN == .
}

drop `varlist'
foreach var of local varlist {
rename `var'VIN `var'
}

compress
save "EventData/placebo_v3/Insp_Crash_Event_VIN_1pct_r`r'.dta", replace


*lic
use "EventData/placebo_v3/insp_event_lic_1pct_r`r'.dta", clear

merge m:1 date insp_unit_license insp_unit_license_state using "CrashData/Crash_alllic", nogen keep(master match) keepusing(`varlist')

save "EventData/placebo_v3/Insp_Crash_Event_lic_1pct_r`r'.dta", replace

* append two datasets
use "EventData/placebo_v3/Insp_Crash_Event_lic_1pct_r`r'.dta",clear
append using "EventData/placebo_v3/Insp_Crash_Event_VIN_1pct_r`r'.dta"

gen inspection = 1 if event_time == 0
replace inspection = 0 if inspection == .
replace crash = 0 if crash == .


* gen dep & indep var
gen insp_start_hr = floor(rand_insp_time`r'/100)
gen insp_start_min = rand_insp_time`r' - insp_start_hr * 100

gen insp_time = mdyhms(insp_month,insp_day,insp_year,insp_start_hr,insp_start_min,0)

drop insp_start_hr insp_start_min

gen post_insp = (event_time >= 0 )

gen year = year(date)
gen month = month(date)
gen dayofweek = dow(date)

compress
save "EventData/placebo_v3/Insp_Crash_Event_1pct_r`r'.dta", replace



gegen new_group_id = group(new_VIN rand_insp_id insp_year)
replace new_group_id = 0 if new_group_id == .
replace new_group_id = . if event_time == .


* outside event window insp indicator
gen insp_p = 0
gen insp_m = 0
replace insp_p = 1 if mdy(insp_month, insp_day, insp_year) > first_insp1
replace insp_m = 1 if mdy(insp_month, insp_day, insp_year) < last_insp1


* drop events within 3,6,12,18,24 hours of inspection
foreach t of numlist 18 {
	local hr_`t' = mdyhms(1,1,2000,`t',0,0)-mdyhms(1,1,2000,0,0,0)

	gen temp_crash_`t'hr = 1 if crash_time >= insp_time - `hr_`t'' & crash_time < insp_time
	bysort new_group_id: gegen crash_event_`t'hr = max(temp_crash_`t'hr)
}

drop temp_crash*


* generate inspection lead and lags
bys new_group_id event_time: gen N=_N
assert N == 1
drop N

tsset new_group_id event_time

quietly forvalues i=1/14 { 
gen byte F`i'insp=F`i'.inspection
replace F`i'insp = 0 if F`i'insp == .

gen byte L`i'insp=L`i'.inspection
replace L`i'insp = 0 if L`i'insp == .
}


* 2-day bins
* combine into 2-day bins to increase power [(F14,F13)...(F2,F1),(0,L1),(L2,L3)...(L12,L13)
egen insp_m0_1=rowmax(inspection L1insp)
replace insp_m0_1=0 if insp_m0_1==.

quietly forvalues i=2(2)12 {
local k=`i'+1
egen insp_m`i'_`k'=rowmax(L`i'insp L`k'insp)
replace insp_m`i'_`k'=0 if insp_m`i'_`k'==.
}

quietly forvalues i=1(2)13 {
local k=`i'+1
egen insp_p`i'_`k'=rowmax(F`i'insp F`k'insp)
replace insp_p`i'_`k'=0 if insp_p`i'_`k'==.
}	
		

		
compress
save "$Truck/processed_data/EventData/placebo_v3/Insp_Crash_1pct_r`r'.dta", replace




noi di "regression"
* save regression results


use "$Truck/processed_data/EventData/placebo_v3/Insp_Crash_1pct_r`r'.dta", clear

local insp_F
forvalues t = 14(-2)4{
local s = `t'-1
local insp_F `insp_F' insp_p`s'_`t'
}
forvalues t = 1(2)13 {
local s = `t'-1
local insp_F `insp_F' insp_m`s'_`t'
}


reghdfe crash `insp_F' insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  

	* store results
	quietly forvalues i=0(2)12 {
	local k=`i'+1
	local insp_m`i'_`k' = _b[insp_m`i'_`k']
	}

	quietly forvalues i=3(2)13 {
	local k=`i'+1
	local insp_p`i'_`k' = _b[insp_p`i'_`k']
	}	
			
reghdfe crash post_insp insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  

	local post_insp = _b[post_insp]
	local se_postinsp = _se[post_insp]


* mean of dep variable
summ crash if e(sample)
	
	* store results
	local dep_mean = r(mean)
	local n = e(N)


use "$Truck/processed_data/EventData/placebo_v3/est_results_r2.dta", clear

forvalues i = 0(2)12 {

	local k=`i'+1

	replace coeff_`r' = `insp_m`i'_`k'' if event_time == `i'
}

forvalues i=3(2)13 {

	local k=`i'+1

	replace coeff_`r' = `insp_p`i'_`k'' if event_time == -`k'
}

replace coeff_`r' = `post_insp' if event_time == 14
replace coeff_`r' = `se_postinsp' if event_time == 16

replace n_`r' = `n'
replace dep_mean_`r' = `dep_mean'

compress
save  "$Truck/processed_data/EventData/placebo_v3/est_results_r2.dta", replace

}




* segment3
qui foreach r of numlist 201/300 {

noi di "***** random assignment No.`r' ******"

* expand inspection
local varlist rand_insp_id insp_facility insp_unit_license insp_unit_license_state insp_unit_vehicle_id_number new_VIN first_insp1 last_insp1 rand_insp`r' rand_insp_time`r'

use `varlist' using "InspectionData/placebo_v2/Insp_Truck_1pct_rand", clear

keep if insp_unit_vehicle_id_number ~= ""

keep if insp_facility == "F"


gen insp_year = year(rand_insp`r')
gen insp_month = month(rand_insp`r')
gen insp_day = day(rand_insp`r')

expand 29 
bysort insp_unit_vehicle_id_number insp_unit_license insp_unit_license_state rand_insp_id rand_insp`r': gen event_time = _n - 15	

assert inrange(event_time,-14,14)

gen date = mdy(insp_month,insp_day,insp_year)
replace date = date + event_time
replace rand_insp`r' = . if event_time ~= 0

save "EventData/placebo_v3/insp_event_VIN_1pct_r`r'.dta", replace




use `varlist' using "InspectionData/placebo_v2/Insp_Truck_1pct_rand", clear

keep if insp_unit_vehicle_id_number == ""

keep if insp_facility == "F"

gen insp_year = year(rand_insp`r')
gen insp_month = month(rand_insp`r')
gen insp_day = day(rand_insp`r')

expand 29 
bysort insp_unit_vehicle_id_number insp_unit_license insp_unit_license_state rand_insp_id rand_insp`r': gen event_time = _n - 15	

assert inrange(event_time,-14,14)

gen date = mdy(insp_month,insp_day,insp_year)
replace date = date + event_time
replace rand_insp`r' = . if event_time ~= 0

save "EventData/placebo_v3/insp_event_lic_1pct_r`r'.dta", replace





* merge in true crash history
*VIN
use "EventData/placebo_v3/insp_event_VIN_1pct_r`r'.dta", clear
 
local varlist "report_date report_time crash_time crash"

merge m:1 date insp_unit_vehicle_id_number using "CrashData/Crash_allVIN", nogen keep(master match) keepusing(`varlist')

foreach var of local varlist {
rename `var' `var'VIN
}

merge m:1 date insp_unit_license insp_unit_license_state using "CrashData/Crash_alllic", nogen keep(master match) keepusing(`varlist')

foreach var of local varlist {
replace `var'VIN = `var' if `var'VIN == .
}

drop `varlist'
foreach var of local varlist {
rename `var'VIN `var'
}

compress
save "EventData/placebo_v3/Insp_Crash_Event_VIN_1pct_r`r'.dta", replace


*lic
use "EventData/placebo_v3/insp_event_lic_1pct_r`r'.dta", clear

merge m:1 date insp_unit_license insp_unit_license_state using "CrashData/Crash_alllic", nogen keep(master match) keepusing(`varlist')

save "EventData/placebo_v3/Insp_Crash_Event_lic_1pct_r`r'.dta", replace

* append two datasets
use "EventData/placebo_v3/Insp_Crash_Event_lic_1pct_r`r'.dta",clear
append using "EventData/placebo_v3/Insp_Crash_Event_VIN_1pct_r`r'.dta"

gen inspection = 1 if event_time == 0
replace inspection = 0 if inspection == .
replace crash = 0 if crash == .


* gen dep & indep var
gen insp_start_hr = floor(rand_insp_time`r'/100)
gen insp_start_min = rand_insp_time`r' - insp_start_hr * 100

gen insp_time = mdyhms(insp_month,insp_day,insp_year,insp_start_hr,insp_start_min,0)

drop insp_start_hr insp_start_min

gen post_insp = (event_time >= 0 )

gen year = year(date)
gen month = month(date)
gen dayofweek = dow(date)

compress
save "EventData/placebo_v3/Insp_Crash_Event_1pct_r`r'.dta", replace



gegen new_group_id = group(new_VIN rand_insp_id insp_year)
replace new_group_id = 0 if new_group_id == .
replace new_group_id = . if event_time == .


* outside event window insp indicator
gen insp_p = 0
gen insp_m = 0
replace insp_p = 1 if mdy(insp_month, insp_day, insp_year) > first_insp1
replace insp_m = 1 if mdy(insp_month, insp_day, insp_year) < last_insp1


* drop events within 3,6,12,18,24 hours of inspection
foreach t of numlist 18 {
	local hr_`t' = mdyhms(1,1,2000,`t',0,0)-mdyhms(1,1,2000,0,0,0)

	gen temp_crash_`t'hr = 1 if crash_time >= insp_time - `hr_`t'' & crash_time < insp_time
	bysort new_group_id: gegen crash_event_`t'hr = max(temp_crash_`t'hr)
}

drop temp_crash*


* generate inspection lead and lags
bys new_group_id event_time: gen N=_N
assert N == 1
drop N

tsset new_group_id event_time

quietly forvalues i=1/14 { 
gen byte F`i'insp=F`i'.inspection
replace F`i'insp = 0 if F`i'insp == .

gen byte L`i'insp=L`i'.inspection
replace L`i'insp = 0 if L`i'insp == .
}


* 2-day bins
* combine into 2-day bins to increase power [(F14,F13)...(F2,F1),(0,L1),(L2,L3)...(L12,L13)
egen insp_m0_1=rowmax(inspection L1insp)
replace insp_m0_1=0 if insp_m0_1==.

quietly forvalues i=2(2)12 {
local k=`i'+1
egen insp_m`i'_`k'=rowmax(L`i'insp L`k'insp)
replace insp_m`i'_`k'=0 if insp_m`i'_`k'==.
}

quietly forvalues i=1(2)13 {
local k=`i'+1
egen insp_p`i'_`k'=rowmax(F`i'insp F`k'insp)
replace insp_p`i'_`k'=0 if insp_p`i'_`k'==.
}	
		

		
compress
save "$Truck/processed_data/EventData/placebo_v3/Insp_Crash_1pct_r`r'.dta", replace




noi di "regression"
* save regression results


use "$Truck/processed_data/EventData/placebo_v3/Insp_Crash_1pct_r`r'.dta", clear

local insp_F
forvalues t = 14(-2)4{
local s = `t'-1
local insp_F `insp_F' insp_p`s'_`t'
}
forvalues t = 1(2)13 {
local s = `t'-1
local insp_F `insp_F' insp_m`s'_`t'
}


reghdfe crash `insp_F' insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  

	* store results
	quietly forvalues i=0(2)12 {
	local k=`i'+1
	local insp_m`i'_`k' = _b[insp_m`i'_`k']
	}

	quietly forvalues i=3(2)13 {
	local k=`i'+1
	local insp_p`i'_`k' = _b[insp_p`i'_`k']
	}	
			
reghdfe crash post_insp insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  


	local post_insp = _b[post_insp]
	local se_postinsp = _se[post_insp]
	

* mean of dep variable
summ crash if e(sample)
	
	* store results
	local dep_mean = r(mean)
	local n = e(N)


use "$Truck/processed_data/EventData/placebo_v3/est_results_r3.dta", clear

forvalues i = 0(2)12 {

	local k=`i'+1

	replace coeff_`r' = `insp_m`i'_`k'' if event_time == `i'
}

forvalues i=3(2)13 {

	local k=`i'+1

	replace coeff_`r' = `insp_p`i'_`k'' if event_time == -`k'
}

replace coeff_`r' = `post_insp' if event_time == 14
replace coeff_`r' = `se_postinsp' if event_time == 16

replace n_`r' = `n'
replace dep_mean_`r' = `dep_mean'

compress
save  "$Truck/processed_data/EventData/placebo_v3/est_results_r3.dta", replace

}




* segment4
qui foreach r of numlist 301/400 {

noi di "***** random assignment No.`r' ******"

* expand inspection
local varlist rand_insp_id insp_facility insp_unit_license insp_unit_license_state insp_unit_vehicle_id_number new_VIN first_insp1 last_insp1 rand_insp`r' rand_insp_time`r'

use `varlist' using "InspectionData/placebo_v2/Insp_Truck_1pct_rand", clear

keep if insp_unit_vehicle_id_number ~= ""

keep if insp_facility == "F"


gen insp_year = year(rand_insp`r')
gen insp_month = month(rand_insp`r')
gen insp_day = day(rand_insp`r')

expand 29 
bysort insp_unit_vehicle_id_number insp_unit_license insp_unit_license_state rand_insp_id rand_insp`r': gen event_time = _n - 15	

assert inrange(event_time,-14,14)

gen date = mdy(insp_month,insp_day,insp_year)
replace date = date + event_time
replace rand_insp`r' = . if event_time ~= 0

save "EventData/placebo_v3/insp_event_VIN_1pct_r`r'.dta", replace




use `varlist' using "InspectionData/placebo_v2/Insp_Truck_1pct_rand", clear

keep if insp_unit_vehicle_id_number == ""

keep if insp_facility == "F"

gen insp_year = year(rand_insp`r')
gen insp_month = month(rand_insp`r')
gen insp_day = day(rand_insp`r')

expand 29 
bysort insp_unit_vehicle_id_number insp_unit_license insp_unit_license_state rand_insp_id rand_insp`r': gen event_time = _n - 15	

assert inrange(event_time,-14,14)

gen date = mdy(insp_month,insp_day,insp_year)
replace date = date + event_time
replace rand_insp`r' = . if event_time ~= 0

save "EventData/placebo_v3/insp_event_lic_1pct_r`r'.dta", replace





* merge in true crash history
*VIN
use "EventData/placebo_v3/insp_event_VIN_1pct_r`r'.dta", clear
 
local varlist "report_date report_time crash_time crash"

merge m:1 date insp_unit_vehicle_id_number using "CrashData/Crash_allVIN", nogen keep(master match) keepusing(`varlist')

foreach var of local varlist {
rename `var' `var'VIN
}

merge m:1 date insp_unit_license insp_unit_license_state using "CrashData/Crash_alllic", nogen keep(master match) keepusing(`varlist')

foreach var of local varlist {
replace `var'VIN = `var' if `var'VIN == .
}

drop `varlist'
foreach var of local varlist {
rename `var'VIN `var'
}

compress
save "EventData/placebo_v3/Insp_Crash_Event_VIN_1pct_r`r'.dta", replace


*lic
use "EventData/placebo_v3/insp_event_lic_1pct_r`r'.dta", clear

merge m:1 date insp_unit_license insp_unit_license_state using "CrashData/Crash_alllic", nogen keep(master match) keepusing(`varlist')

save "EventData/placebo_v3/Insp_Crash_Event_lic_1pct_r`r'.dta", replace

* append two datasets
use "EventData/placebo_v3/Insp_Crash_Event_lic_1pct_r`r'.dta",clear
append using "EventData/placebo_v3/Insp_Crash_Event_VIN_1pct_r`r'.dta"

gen inspection = 1 if event_time == 0
replace inspection = 0 if inspection == .
replace crash = 0 if crash == .


* gen dep & indep var
gen insp_start_hr = floor(rand_insp_time`r'/100)
gen insp_start_min = rand_insp_time`r' - insp_start_hr * 100

gen insp_time = mdyhms(insp_month,insp_day,insp_year,insp_start_hr,insp_start_min,0)

drop insp_start_hr insp_start_min

gen post_insp = (event_time >= 0 )

gen year = year(date)
gen month = month(date)
gen dayofweek = dow(date)

compress
save "EventData/placebo_v3/Insp_Crash_Event_1pct_r`r'.dta", replace



gegen new_group_id = group(new_VIN rand_insp_id insp_year)
replace new_group_id = 0 if new_group_id == .
replace new_group_id = . if event_time == .


* outside event window insp indicator
gen insp_p = 0
gen insp_m = 0
replace insp_p = 1 if mdy(insp_month, insp_day, insp_year) > first_insp1
replace insp_m = 1 if mdy(insp_month, insp_day, insp_year) < last_insp1


* drop events within 3,6,12,18,24 hours of inspection
foreach t of numlist 18 {
	local hr_`t' = mdyhms(1,1,2000,`t',0,0)-mdyhms(1,1,2000,0,0,0)

	gen temp_crash_`t'hr = 1 if crash_time >= insp_time - `hr_`t'' & crash_time < insp_time
	bysort new_group_id: gegen crash_event_`t'hr = max(temp_crash_`t'hr)
}

drop temp_crash*


* generate inspection lead and lags
bys new_group_id event_time: gen N=_N
assert N == 1
drop N

tsset new_group_id event_time

quietly forvalues i=1/14 { 
gen byte F`i'insp=F`i'.inspection
replace F`i'insp = 0 if F`i'insp == .

gen byte L`i'insp=L`i'.inspection
replace L`i'insp = 0 if L`i'insp == .
}


* 2-day bins
* combine into 2-day bins to increase power [(F14,F13)...(F2,F1),(0,L1),(L2,L3)...(L12,L13)
egen insp_m0_1=rowmax(inspection L1insp)
replace insp_m0_1=0 if insp_m0_1==.

quietly forvalues i=2(2)12 {
local k=`i'+1
egen insp_m`i'_`k'=rowmax(L`i'insp L`k'insp)
replace insp_m`i'_`k'=0 if insp_m`i'_`k'==.
}

quietly forvalues i=1(2)13 {
local k=`i'+1
egen insp_p`i'_`k'=rowmax(F`i'insp F`k'insp)
replace insp_p`i'_`k'=0 if insp_p`i'_`k'==.
}	
		

		
compress
save "$Truck/processed_data/EventData/placebo_v3/Insp_Crash_1pct_r`r'.dta", replace




noi di "regression"
* save regression results


use "$Truck/processed_data/EventData/placebo_v3/Insp_Crash_1pct_r`r'.dta", clear

local insp_F
forvalues t = 14(-2)4{
local s = `t'-1
local insp_F `insp_F' insp_p`s'_`t'
}
forvalues t = 1(2)13 {
local s = `t'-1
local insp_F `insp_F' insp_m`s'_`t'
}


reghdfe crash `insp_F' insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  

	* store results
	quietly forvalues i=0(2)12 {
	local k=`i'+1
	local insp_m`i'_`k' = _b[insp_m`i'_`k']
	}

	quietly forvalues i=3(2)13 {
	local k=`i'+1
	local insp_p`i'_`k' = _b[insp_p`i'_`k']
	}	
			
reghdfe crash post_insp insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  

	local post_insp = _b[post_insp]
	local se_postinsp = _se[post_insp]
	

* mean of dep variable
summ crash if e(sample)
	
	* store results
	local dep_mean = r(mean)
	local n = e(N)


use "$Truck/processed_data/EventData/placebo_v3/est_results_r4.dta", clear

forvalues i = 0(2)12 {

	local k=`i'+1

	replace coeff_`r' = `insp_m`i'_`k'' if event_time == `i'
}

forvalues i=3(2)13 {

	local k=`i'+1

	replace coeff_`r' = `insp_p`i'_`k'' if event_time == -`k'
}

replace coeff_`r' = `post_insp' if event_time == 14
replace coeff_`r' = `se_postinsp' if event_time == 16

replace n_`r' = `n'
replace dep_mean_`r' = `dep_mean'

compress
save  "$Truck/processed_data/EventData/placebo_v3/est_results_r4.dta", replace

}




* segment5
qui foreach r of numlist 401/500 {

noi di "***** random assignment No.`r' ******"

* expand inspection
local varlist rand_insp_id insp_facility insp_unit_license insp_unit_license_state insp_unit_vehicle_id_number new_VIN first_insp1 last_insp1 rand_insp`r' rand_insp_time`r'

use `varlist' using "InspectionData/placebo_v2/Insp_Truck_1pct_rand", clear

keep if insp_unit_vehicle_id_number ~= ""

keep if insp_facility == "F"


gen insp_year = year(rand_insp`r')
gen insp_month = month(rand_insp`r')
gen insp_day = day(rand_insp`r')

expand 29 
bysort insp_unit_vehicle_id_number insp_unit_license insp_unit_license_state rand_insp_id rand_insp`r': gen event_time = _n - 15	

assert inrange(event_time,-14,14)

gen date = mdy(insp_month,insp_day,insp_year)
replace date = date + event_time
replace rand_insp`r' = . if event_time ~= 0

save "EventData/placebo_v3/insp_event_VIN_1pct_r`r'.dta", replace




use `varlist' using "InspectionData/placebo_v2/Insp_Truck_1pct_rand", clear

keep if insp_unit_vehicle_id_number == ""

keep if insp_facility == "F"

gen insp_year = year(rand_insp`r')
gen insp_month = month(rand_insp`r')
gen insp_day = day(rand_insp`r')

expand 29 
bysort insp_unit_vehicle_id_number insp_unit_license insp_unit_license_state rand_insp_id rand_insp`r': gen event_time = _n - 15	

assert inrange(event_time,-14,14)

gen date = mdy(insp_month,insp_day,insp_year)
replace date = date + event_time
replace rand_insp`r' = . if event_time ~= 0

save "EventData/placebo_v3/insp_event_lic_1pct_r`r'.dta", replace





* merge in true crash history
*VIN
use "EventData/placebo_v3/insp_event_VIN_1pct_r`r'.dta", clear
 
local varlist "report_date report_time crash_time crash"

merge m:1 date insp_unit_vehicle_id_number using "CrashData/Crash_allVIN", nogen keep(master match) keepusing(`varlist')

foreach var of local varlist {
rename `var' `var'VIN
}

merge m:1 date insp_unit_license insp_unit_license_state using "CrashData/Crash_alllic", nogen keep(master match) keepusing(`varlist')

foreach var of local varlist {
replace `var'VIN = `var' if `var'VIN == .
}

drop `varlist'
foreach var of local varlist {
rename `var'VIN `var'
}

compress
save "EventData/placebo_v3/Insp_Crash_Event_VIN_1pct_r`r'.dta", replace


*lic
use "EventData/placebo_v3/insp_event_lic_1pct_r`r'.dta", clear

merge m:1 date insp_unit_license insp_unit_license_state using "CrashData/Crash_alllic", nogen keep(master match) keepusing(`varlist')

save "EventData/placebo_v3/Insp_Crash_Event_lic_1pct_r`r'.dta", replace

* append two datasets
use "EventData/placebo_v3/Insp_Crash_Event_lic_1pct_r`r'.dta",clear
append using "EventData/placebo_v3/Insp_Crash_Event_VIN_1pct_r`r'.dta"

gen inspection = 1 if event_time == 0
replace inspection = 0 if inspection == .
replace crash = 0 if crash == .


* gen dep & indep var
gen insp_start_hr = floor(rand_insp_time`r'/100)
gen insp_start_min = rand_insp_time`r' - insp_start_hr * 100

gen insp_time = mdyhms(insp_month,insp_day,insp_year,insp_start_hr,insp_start_min,0)

drop insp_start_hr insp_start_min

gen post_insp = (event_time >= 0 )

gen year = year(date)
gen month = month(date)
gen dayofweek = dow(date)

compress
save "EventData/placebo_v3/Insp_Crash_Event_1pct_r`r'.dta", replace



gegen new_group_id = group(new_VIN rand_insp_id insp_year)
replace new_group_id = 0 if new_group_id == .
replace new_group_id = . if event_time == .


* outside event window insp indicator
gen insp_p = 0
gen insp_m = 0
replace insp_p = 1 if mdy(insp_month, insp_day, insp_year) > first_insp1
replace insp_m = 1 if mdy(insp_month, insp_day, insp_year) < last_insp1


* drop events within 3,6,12,18,24 hours of inspection
foreach t of numlist 18 {
	local hr_`t' = mdyhms(1,1,2000,`t',0,0)-mdyhms(1,1,2000,0,0,0)

	gen temp_crash_`t'hr = 1 if crash_time >= insp_time - `hr_`t'' & crash_time < insp_time
	bysort new_group_id: gegen crash_event_`t'hr = max(temp_crash_`t'hr)
}

drop temp_crash*


* generate inspection lead and lags
bys new_group_id event_time: gen N=_N
assert N == 1
drop N

tsset new_group_id event_time

quietly forvalues i=1/14 { 
gen byte F`i'insp=F`i'.inspection
replace F`i'insp = 0 if F`i'insp == .

gen byte L`i'insp=L`i'.inspection
replace L`i'insp = 0 if L`i'insp == .
}


* 2-day bins
* combine into 2-day bins to increase power [(F14,F13)...(F2,F1),(0,L1),(L2,L3)...(L12,L13)
egen insp_m0_1=rowmax(inspection L1insp)
replace insp_m0_1=0 if insp_m0_1==.

quietly forvalues i=2(2)12 {
local k=`i'+1
egen insp_m`i'_`k'=rowmax(L`i'insp L`k'insp)
replace insp_m`i'_`k'=0 if insp_m`i'_`k'==.
}

quietly forvalues i=1(2)13 {
local k=`i'+1
egen insp_p`i'_`k'=rowmax(F`i'insp F`k'insp)
replace insp_p`i'_`k'=0 if insp_p`i'_`k'==.
}	
		

		
compress
save "$Truck/processed_data/EventData/placebo_v3/Insp_Crash_1pct_r`r'.dta", replace




noi di "regression"
* save regression results


use "$Truck/processed_data/EventData/placebo_v3/Insp_Crash_1pct_r`r'.dta", clear

local insp_F
forvalues t = 14(-2)4{
local s = `t'-1
local insp_F `insp_F' insp_p`s'_`t'
}
forvalues t = 1(2)13 {
local s = `t'-1
local insp_F `insp_F' insp_m`s'_`t'
}


reghdfe crash `insp_F' insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  

	* store results
	quietly forvalues i=0(2)12 {
	local k=`i'+1
	local insp_m`i'_`k' = _b[insp_m`i'_`k']
	}

	quietly forvalues i=3(2)13 {
	local k=`i'+1
	local insp_p`i'_`k' = _b[insp_p`i'_`k']
	}	
			
reghdfe crash post_insp insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  

	local post_insp = _b[post_insp]
	local se_postinsp = _se[post_insp]
	

* mean of dep variable
summ crash if e(sample)
	
	* store results
	local dep_mean = r(mean)
	local n = e(N)


use "$Truck/processed_data/EventData/placebo_v3/est_results_r5.dta", clear

forvalues i = 0(2)12 {

	local k=`i'+1

	replace coeff_`r' = `insp_m`i'_`k'' if event_time == `i'
}

forvalues i=3(2)13 {

	local k=`i'+1

	replace coeff_`r' = `insp_p`i'_`k'' if event_time == -`k'
}

replace coeff_`r' = `post_insp' if event_time == 14
replace coeff_`r' = `se_postinsp' if event_time == 16

replace n_`r' = `n'
replace dep_mean_`r' = `dep_mean'

compress
save  "$Truck/processed_data/EventData/placebo_v3/est_results_r5.dta", replace

}



* combine 5 segments together, plot
use "$Truck/processed_data/EventData/placebo_v3/est_results_r1.dta", clear

fmerge 1:1 event_time using "$Truck/processed_data/EventData/placebo_v3/est_results_r2.dta", assert(match) nogen
fmerge 1:1 event_time using "$Truck/processed_data/EventData/placebo_v3/est_results_r3.dta", assert(match) nogen
fmerge 1:1 event_time using "$Truck/processed_data/EventData/placebo_v3/est_results_r4.dta", assert(match) nogen
fmerge 1:1 event_time using "$Truck/processed_data/EventData/placebo_v3/est_results_r5.dta", assert(match) nogen


reshape long n_ dep_mean_ coeff_, i(event_time) j(iteration)

compress
save "$Truck/processed_data/EventData/placebo_v3/est_results.dta"



* plots

use "$Truck/processed_data/EventData/placebo_v3/est_results.dta", clear

replace coeff_ = 0 if coeff_ == .

fmerge m:1 event_time using "$Truck/processed_data/EventData/placebo/real_est_results.dta", nogen

summ n_ dep_mean_

* mean coeff 
bys event_time: egen coeff_avg = mean(coeff_) 

* quantiles: 5,10,25,50,75,90,95
foreach num of numlist 5 10 25 50 75 90 95 {
gen coeff_q`num' = .
forvalues t = -14(2)12 {

summ coeff_ if event_time ==`t', detail

replace coeff_q`num' = r(p`num') if event_time ==`t'
}
}




foreach var of varlist coeff* {
	
gen pct_`var' = `var' / dep_mean_ *100
replace `var' = `var' * 10^5

}

replace pct_coeff_real = coeff_real / 6.39 *100
replace pct_coeff_real_lb = coeff_real_lb / 6.39 *100
replace pct_coeff_real_ub = coeff_real_ub / 6.39 *100

	
* figure A22
* 95%CI range plot
bys event_time: egen pct_coeff_9750 = pctile(pct_coeff_) , p(97.5) 
bys event_time: egen pct_coeff_0250 = pctile(pct_coeff_) , p(2.5) 

tw  (rarea pct_coeff_9750 pct_coeff_0250 event_time, fcolor(gs12%30) lcolor(bg)) ///
	(rarea pct_coeff_real_lb pct_coeff_real_ub event_time if iteration == 1 & event_time <= -4, color(red%20) lcolor(bg)) ///
	(rarea pct_coeff_real_lb pct_coeff_real_ub event_time if iteration == 1 & event_time >= 0, color(red%20) lcolor(bg)) ///
	(line pct_coeff_avg event_time if iteration == 1, lcolor(black) lpattern(shortdash) lwidth(0.3)) ///
	(line pct_coeff_real event_time if iteration == 1, lcolor(red) lpattern(solid) lwidth(0.4)) ///
	if inrange(event_time,-14,12), legend(off) xlabel(-14(2)12) ylabel(,nogrid) ///
	xtitle("Days") ytitle("% change in crash rate (day-2=0)") ///
	text(57 4 "observed", place(c) color(forest_green) size(9pt)) ///
	text(0 4 "placebos", place(c) color(black) size(9pt)) ///
	graphregion(color(white)) xsize(4.5)
graph export "$Truck/outputs/Log/EventPlots/placebo/event_study1.pdf", replace   

	
	
	

* figure A21: post_insp coeff histogram
replace pct_coeff_real = 0.0000278 / 0.0000639*100 if event_time == 14

gen pct_coeff_post = pct_coeff_ if event_time == 14
egen post_insp_9750 = pctile(pct_coeff_post) if event_time == 14, p(97.5) 
egen post_insp_0250 = pctile(pct_coeff_post) if event_time == 14, p(2.5) 

replace pct_coeff_ = pct_coeff_ 



summ post_insp_9750
local post_insp_9750 = r(mean)

summ post_insp_0250
local post_insp_0250 = r(mean)

summ pct_coeff_real if event_time == 14
local pct_coeff_real = r(mean)


tw  (hist pct_coeff_ if event_time == 14, fcolor(gs12%50) lcolor(gs4) lw(.1) la(center)) ///
	(scatteri 0 `pct_coeff_real' .09 `pct_coeff_real', recast(line) lp(solid) lcolor(red) lwidth(0.6)) ///
	(scatteri 0 `post_insp_9750' .09 `post_insp_9750', recast(line) lp(shortdash) lcolor(gs2) lwidth(0.4)) ///
	(scatteri 0 `post_insp_0250' .09 `post_insp_0250', recast(line) lp(shortdash) lcolor(gs2) lwidth(0.4)), ///
	 legend(off) ylabel(0(.02).09, nogrid) xlabel(-20(20)50) ///
	xtitle("Placebo Effect Size in %") ytitle("Density") ///
	graphregion(color(white))
graph export "$Truck/outputs/Log/EventPlots/placebo/hist_post.pdf", replace
	
	
	


* figure A23: t-stat plot

use "$Truck/processed_data/EventData/placebo_v3/est_results.dta", clear

replace coeff_ = 0 if coeff_ == .


gen coeff_temp = coeff_ if event_time == 14
bys iteration: egen coeff_post = max(coeff_temp)
drop coeff_temp

gen t_stat = coeff_post / coeff_ if event_time == 16


tw  (hist t_stat, fcolor(gs12%50) lcolor(gs4) lw(.1) la(center)) ///
	(kdensity t_stat, lcolor(black) lw(.3)) ///
	(normal t_stat, lcolor(gs4) lpattern(dash)), ///
	legend(off) ylabel(0(.1).4, nogrid) xlabel(-4(2)4) ///
	xtitle("Post-inspection estimate t-statistics distribution") ytitle("Density") ///
	graphregion(color(white))
graph export "$Truck/outputs/Log/EventPlots/placebo/hist_tstat.pdf", replace
	
	
	

* erase temp files
foreach r of numlist 1/500 {
erase "EventData/placebo_v3/insp_event_VIN_1pct_r`r'.dta"
erase "EventData/placebo_v3/insp_event_lic_1pct_r`r'.dta"

erase "EventData/placebo_v3/Insp_Crash_Event_lic_1pct_r`r'.dta"
erase "EventData/placebo_v3/Insp_Crash_Event_VIN_1pct_r`r'.dta"

erase "EventData/placebo_v3/Insp_Crash_Event_1pct_r`r'.dta"
}


*--------------------------------------------------------------------------
* figure a24 see "2_fig_tab.do"




*----------------------------------------------------------------------------
* table A1

* load outcome analysis file
local insp_list new_VIN new_group_id year month dayofweek post_insp flag_oos insp_m insp_p inspection inspection_id insp_month insp_day insp_year date insp_end_time insp_time event_time insp_countyfips
local crash_list crash crash_event_18hr event_id1 event_id2 event_id3 event_id4 vehicles_in_accident

use `insp_list' `crash_list' using "$Truck/processed_data/EventData/division_100pct/Insp_Crash_100pct_outcome.dta" if inrange(event_time,-14,13) & crash_event_18hr ~=1 & flag_oos ~=1 , clear

*** drop crash == 2 
bys new_group_id: gegen crash_max = max(crash)
summ crash_max 
drop if crash_max == 2  //34 groups
drop crash_max

*** drop special days 
if 1 {

gen insp_dow = dow(mdy(insp_month,insp_day,insp_year))

* 1. roadcheck 72 hours
gen roadcheck_day = 1 if insp_month == 6 & insp_day <= 8 & insp_day > 1 & insp_dow == 2

gen date_roadcheck_day = date if roadcheck_day == 1
bysort insp_year: egen date_roadcheck = max(date_roadcheck_day)

replace roadcheck_day = 1 if date == date_roadcheck + 1 
replace roadcheck_day = 1 if date == date_roadcheck + 2

* 2. brake week
gen brakeweek_day = 1 if insp_month == 9 & insp_day > 5 & insp_day <= 12 & insp_dow == 0

gen date_brakeweek_day = date if brakeweek_day == 1
bysort insp_year: gegen date_brakeweek = max(date_brakeweek_day)
replace date_brakeweek = mdy(9,7,2017) if insp_year == 2017

replace brakeweek_day = 1 if inrange(date, date_brakeweek, date_brakeweek+6)

* 3. brake day
gen brakecheck_day = 1 if insp_month == 5 & insp_day <= 7 & insp_dow == 3

* 4. holidays 
gen holiday = 0
replace holiday = 1 if insp_month == 1 & insp_day == 1 // New Year
replace holiday = 1 if insp_month == 1 & insp_dow == 1 & insp_day >= 15 & insp_day <= 21  // MLK
replace holiday = 1 if insp_month == 2 & insp_dow == 1 & insp_day >= 15 & insp_day <= 21  // GW
replace holiday = 1 if insp_month == 5 & insp_dow == 1 & insp_day >= 25 & insp_day <= 31 // memorial
replace holiday = 1 if insp_month == 7 & insp_day==4 // indep
replace holiday = 1 if insp_month == 9 & insp_dow == 1 & insp_day >= 1 & insp_day <= 7 // memorial
replace holiday = 1 if insp_month == 10 & insp_dow == 1 & insp_day >= 8 & insp_day <= 14 // labor
replace holiday = 1 if insp_month == 11 & insp_day == 11 // veterans
replace holiday = 1 if insp_month == 11 & insp_dow == 4 & insp_day >= 22 & insp_day <= 28 // thanksgiving
replace holiday = 1 if insp_month == 11 & insp_dow == 5 & insp_day >= 23 & insp_day <= 29 // thanksgiving
replace holiday = 1 if insp_month == 12 & insp_day == 24 // xmas
replace holiday = 1 if insp_month == 12 & insp_day == 25 // xmas

* drop special event inspections + holiday
drop if roadcheck_day == 1 | brakeweek_day == 1 | brakecheck_day == 1 | holiday == 1

}


*** duration of inspection
gen end_h = floor(insp_end_time/100)
gen end_m = insp_end_time - end_h * 100

gen end_time = mdyhms(insp_month,insp_day,insp_year,end_h,end_m,0)

gen duration = end_time - insp_time
replace duration = . if duration < 0

*** merge in weather data
gen day = day(date)

gen str4 year_str = string(year, "%04.0f")
gen str2 month_str = string(month, "%02.0f")
gen str2 day_str = string(day, "%02.0f")

rename date sys_date

gen date = year_str + month_str + day_str

destring date, replace
format date %12.0g 

rename insp_countyfips countyfips

merge m:1 countyfips date using "$Truck/processed_data/WeatherData/snowrain.dta", keep(master match) keepusing(PRCP SNOW) nogen 
	// merged 90% of master (only conti US)

*drop if date > 20180911 // GHCN Daily weather data avail before 20180911
* tab state if state_ab == "" 

gen prcp_ind = (PRCP>0)
gen snow_ind = (SNOW>0)

* no observation
replace prcp_ind = . if PRCP == .
replace snow_ind = . if SNOW == .

* rename date back 
rename date date_str 
rename sys_date date 

*** keep analysis sample 
keep crash post_insp duration prcp_ind snow_ind insp_p insp_m new_VIN date year month dayofweek crash_single crash_multi


reghdfe crash post_insp insp_p insp_m, absorb(new_VIN i.date) vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash if e(sample)

reghdfe crash post_insp duration prcp_ind snow_ind insp_p insp_m, absorb(new_VIN i.date) vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash if e(sample)





*-----------------------------------------------------------------------------------------
* table a2 

local varlist "count_added truck_count crash post_insp insp_p insp_m new_VIN year month dayofweek countyfips flag_oos event_time crash_event_18hr"

use `varlist' using "$Truck/Insp_Crash_100pct_traff.dta" if year >= 2012 & flag_oos ~=1 & inrange(event_time,-14,13) & crash_event_18hr ~= 1, clear  

* drop outlier
qui summ count_added, detail
drop if count_added > r(p99)

replace truck_count = . if truck_count == 0

* insp county 
destring countyfips, force replace 

*baseline during the same years for the same sample
reghdfe crash post_insp insp_p insp_m if truck_count ~=., absorb(new_VIN year month dayofweek countyfips) vce(cluster new_VIN)

summ crash if e(sample)

reghdfe crash post_insp truck_count insp_p insp_m, absorb(new_VIN year month dayofweek countyfips) vce(cluster new_VIN)

reghdfe crash post_insp##c.truck_count insp_p insp_m, absorb(new_VIN year month dayofweek countyfips) vce(cluster new_VIN)





*-------------------------------------------------------------------------------------------------
* table A3: non-injury and fatalities 
use "$Truck/processed_data/EventData/division_100pct/Insp_Crash_100pct_outcome.dta",clear

gen no_inj_crash = crash if injuries == 0 & fatalities == 0
replace no_inj_crash = 0 if no_inj_crash == .

reghdfe no_inj_crash post_insp insp_p insp_m, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN) poolsize(5) compact nosample 

summ no_inj_crash if e(sample)

* drop trucks that ever had a serious crash
keep crash insp_p insp_m event_time new_VIN year month dayofweek post_insp injuries fatalities new_group_id

gen serious_crash = 1 if injuries > 0 & injuries ~= .
replace serious_crash = 1 if fatalities > 0 & fatalities ~= .

bysort new_group_id: gegen serious_crash_gp = max(serious_crash)


		
reghdfe crash post_insp insp_p insp_m if serious_crash_gp ~= 1, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  

summ crash if e(sample)




*-----------------------------------------------------------------------------------
* Table A5
* calculate re-insp across stateXyear as policy index
use "$Truck/processed_data/InspectionData/Insp_by_year/temp/Insp_100pct_VIN.dta", clear

* only fixed station
keep if insp_facility == "F"

* drop impossible fips
gen insp_statefips = substr(insp_countyfips,1,2)
drop if insp_state == "US" 
drop if insp_statefips == "03" | insp_statefips == "07" | insp_statefips == "14"  | insp_statefips == "43"  | insp_statefips == "52" 

* look at the date to the most recent inspection
sort new_VIN insp_date insp_start_time
gegen seq_insp = group(insp_date insp_start_time inspection_id)  
bysort new_VIN: egen rank_insp = rank(seq_insp)

*replace rank_insp = floor(rank_insp/1)  // truck + trailor (share the same license)
*duplicates drop new_VIN rank_insp, force

tsset new_VIN rank_insp

gen D_insp_date = date - L.date

drop if D_insp_date == 0  // drop inspection twice a day
drop if D_insp_date == .

* probability of another inspection in a quarter = D_insp_date < 90 days
gen Ddate_30 = (D_insp_date < 30)
gen Ddate_60 = (D_insp_date < 60)
gen Ddate_90 = (D_insp_date < 90)
gen Ddate_180 = (D_insp_date < 180)

* pct of inspections within 90 days
gcollapse (sum) inspection Ddate_30 Ddate_60 Ddate_90 Ddate_180, by(insp_statefips insp_year)

gen pct30 = Ddate_30 / inspection
gen pct60 = Ddate_60 / inspection
gen pct90 = Ddate_90 / inspection
gen pct180 = Ddate_180 / inspection

rename (insp_year insp_statefips) (year state)

destring state, force replace

compress
save "$Truck/processed_data/InspectionData/Insp_by_year/new/reinsp_st_yr2.dta", replace

* merge onto main file for regressions
use crash insp_p insp_m event_time new_VIN year month dayofweek post_insp insp_countyfips flag_oos crash_event_18hr using "$Truck/processed_data/EventData/division_100pct/Insp_Crash_100pct_outcome.dta" if flag_oos ~=1 & inrange(event_time,-14,13) & crash_event_18hr ~= 1, clear

gen insp_statefips = substr(insp_countyfips, 1,2)
destring insp_statefips, gen(state)

* probability of another inspection in a quarter = D_insp_date < 90 days
fmerge m:1 state year using "$Truck/processed_data/InspectionData/Insp_by_year/new/reinsp_st_yr2.dta", keep(master match) keepusing(Ddate* pct*) nogen

local varlist "pct60 pct90 pct180"
foreach var of local varlist {
	gen I_post_`var' = post_insp * `var'
}

reghdfe crash post_insp pct90 I_post_pct90 insp_p insp_m, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  
reghdfe crash post_insp pct90 I_post_pct90 insp_p insp_m, absorb(new_VIN i.year i.month i.dayofweek i.state) vce(cluster new_VIN)  


summ crash if e(sample)



*------------------------------------------------------------------------------------
* table a6 see  "2_fig_tab.do"


*-----------------------------------------------------------------------------------------------
* table A7

* (1) firm  
local insp_varlist year month date dayofweek insp_p insp_m event_time post_insp new_VIN new_group_id inspection insp_time dot_number inspection_id insp_year
local crash_varlist crash injuries fatalities crash_event* crash_time 

use `insp_varlist' `crash_varlist' using "$Truck/processed_data/EventData/division_100pct/Insp_Crash_100pct_pre.dta", clear nolabel

keep if dot_number ~= .

* merge in carrier info (driver, vehicle, cargo) (cargo later, too big)
fmerge m:1 dot_number using "CensusData/carrier_census", ///
	keepusing(tot_trucks tot_buses tot_pwr fleetsize tot_cars inter_drs intra_drs avg_tld tot_drs cdl_drs drs_lt100 drs_gt100) ///
	keep(match master) nogen


fmerge m:1 inspection_id insp_year using "$Truck/processed_data/InspectionData/Insp_outcome.dta", ///
	keepusing(oos_total) nogen keep(match master)

gen flag_oos = 1 if oos_total > 0 & oos_total ~= .

compress
save "$Truck/processed_data/EventData/division_100pct/InspCrash_100pct_Carrier", replace

* save memory
drop if flag_oos == 1
keep if inrange(event_time,-14,13) & crash_event_18hr ~= 1


* p50 in the inspection sample
foreach var of varlist tot_pwr tot_trucks tot_buses  tot_drs cdl_drs inter_drs intra_drs avg_tld {

	summ `var' if `var' ~= 0, detail
	gen large_`var' = (`var'>r(p50))
	replace large_`var' = . if `var' == .

}

* carrier with more than median # of trucks/buses
reghdfe crash post_insp insp_p insp_m if large_tot_pwr == 1, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash if large_tot_pwr == 1


* carrier with less than median # of trucks/buses
reghdfe crash post_insp insp_p insp_m if large_tot_pwr == 0, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash if large_tot_pwr == 0


* carrier with 1 pwr & 1 driver only
reghdfe crash post_insp insp_p insp_m if tot_pwr == 1 & tot_drs == 1, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash if tot_pwr == 1 & tot_drs == 1



log using "$Truck/outputs/Log/firm_redo", append text name(firm_hetero)

* more drivers
reghdfe crash post_insp insp_p insp_m if large_tot_drs == 1, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash if large_tot_drs == 1

* less drivers
reghdfe crash post_insp insp_p insp_m if large_tot_drs == 0, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash if large_tot_drs == 0




* inter-state vs intra-state
* both inter- & intra- state business
reghdfe crash post_insp insp_p insp_m if inter_drs ~= 0 & intra_drs ~= 0 & tot_drs ~= . , absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash if inter_drs ~= 0 & intra_drs ~= 0 & tot_drs ~= .

* inter-state business only
reghdfe crash post_insp insp_p insp_m if inter_drs ~= 0 & intra_drs == 0 & tot_drs ~= . , absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash if inter_drs ~= 0 & intra_drs == 0 & tot_drs ~= .	

* intra-state business only
reghdfe crash post_insp insp_p insp_m if inter_drs == 0 & intra_drs ~= 0 & tot_drs ~= . , absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash if inter_drs == 0 & intra_drs ~= 0 & tot_drs ~= .



* truck versus bus carrier
* only bus: sample size too small
reghdfe crash post_insp insp_p insp_m if tot_truck == 0 & tot_bus ~= 0 & tot_pwr ~= ., absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash if tot_truck == 0 & tot_bus ~= 0 & tot_pwr ~= .

* only truck
reghdfe crash post_insp insp_p insp_m if tot_truck ~= 0 & tot_bus == 0 & tot_pwr ~= ., absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash if tot_truck ~= 0 & tot_bus == 0 & tot_pwr ~= .











*-----------------------------------------------------------------------------------------------------
* table A8

* merge in inspection outcome to drop oos trucks
use  "$Truck/processed_data/EventData/division_100pct/InspCrash_100pct_Carrier_cargo2", clear

fmerge m:1 inspection_id insp_year using "$Truck/processed_data/InspectionData/Insp_outcome.dta", ///
	keepusing(oos_total) nogen keep(match master)

gen flag_oos = 1 if oos_total > 0 & oos_total ~= .

save "$Truck/processed_data/EventData/division_100pct/InspCrash_100pct_Carrier_cargo2", replace

* save memory
drop if flag_oos == 1
keep if inrange(event_time,-14,13) & crash_event_18hr ~= 1

* select a few large categories: genfreight paperprod bldgmat beverages coldfood produce metalsheet

* general freight, chemicals, food and beverage
foreach var of varlist genfreight1 chem1 foodbeve { 
	
	reghdfe crash post_insp insp_p insp_m if `var' == 1, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN) poolsize(5) compact nosample 

	summ crash if e(sample)
	
}

* heavy duty commodities
foreach var of varlist paperprod1 bldgmat1 metalsheet1 heavyduty { 

	reghdfe crash post_insp insp_p insp_m if `var' == 1, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN) poolsize(5) compact nosample 
	
summ crash if e(sample)
}




*-------------------------------------------------------------------------------------------------
* table A9
use "$Truck/processed_data/EventData/division_100pct/Insp_Crash_100pct_outcome.dta",clear

* (1) violations  

* save memory:
drop if flag_oos == 1
keep if inrange(event_time,-14,13) & crash_event_18hr ~= 1

preserve

keep crash insp_p insp_m event_time new_VIN year month dayofweek post_insp driver_viol_total vehicle_viol_total viol_total injuries fatalities

gen viol_ind = 1 if viol_total > 0 & viol_total ~= .
replace viol_ind = 0 if viol_ind == .

* any violations
reghdfe crash post_insp insp_p insp_m if viol_ind == 1, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash if e(sample)

* no violations
reghdfe crash post_insp insp_p insp_m if viol_ind == 0, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash if e(sample)

* driver violations
replace driver_viol_total = 0 if driver_viol_total == .

reghdfe crash post_insp insp_p insp_m if driver_viol_total > 0, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash if e(sample)


* vehicle violations
replace vehicle_viol_total = 0 if vehicle_viol_total  == .

reghdfe crash post_insp insp_p insp_m if vehicle_viol_total > 0, absorb(new_VIN i.year i.month i.dayofweek)	vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash if e(sample)



*--------------------------------------------------------------------------------------------
* see table A10 and A11 above 





*--------------------------------------------------------------------------------------------
* table A12, A13, A14

* load outcome analysis file
local insp_list insp_countyfips event_time new_VIN new_group_id year month dayofweek post_insp insp_m insp_p inspection inspection_id dot_number insp_unit_license insp_unit_license_state insp_unit_vehicle_id_number insp_year insp_month insp_day date insp_time  flag_oos
local crash_list crash crash_event_18hr crash_time fatalities injuries vehicles_in_accident 

use `insp_list' `crash_list' using "$Truck/processed_data/EventData/division_100pct/Insp_Crash_100pct_outcome.dta" if inrange(event_time,-14,13) & crash_event_18hr ~=1 & flag_oos~=1, clear

replace vehicles_in_accident = 1 if vehicles_in_accident == 0 & crash == 1
replace vehicles_in_accident = 1 if vehicles_in_accident == . & crash == 1
replace vehicles_in_accident = 0 if crash == 0

foreach var of varlist fatalities injuries vehicles_in_accident {
		
	reghdfe `var' post_insp insp_p insp_m, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  
	summ `var' if e(sample)

}

* merge in vehicle weight from inspection data 
preserve

foreach i of numlist 1996/2018 { 
  
	noi di "*** year `i' ***"
	
	insheet using "$Truck/processed_data/InspectionData/Insp_Pub_`i'.csv", clear
	
	keep inspection_id gross_comb_veh_wt
				
	gen insp_year = `i'

	tempfile weight`i'
	save `weight`i''
}

clear
foreach i of numlist 1996/2018 {  
	append using `weight`i''
}

compress
save "$Truck/processed_data/InspectionData/Insp_weight.dta", replace  

restore 

fmerge m:1 inspection_id insp_year using "$Truck/processed_data/InspectionData/Insp_weight.dta", keepusing(gross_comb_veh_wt) 

gen weight = gross_comb_veh_wt
replace weight = . if gross_comb_veh_wt == 0 | gross_comb_veh_wt == 999999
gstats tab weight, s(n mean sd min p5 p25 p50 p75 p95 max)


*** continuous weight measurement 

* indicator of fat
gen I_fatal = (fatalities > 0 & fatalities~=.)
replace I_fatal = . if fatalities == .

gen I_injury = (injuries > 0 & injuries~=.)
replace I_injury = . if injuries == .


* crash = 1, inj = 2, fat = 3 
gen severity = 1 if crash == 1
replace severity = 2 if I_injury == 1
replace severity = 3 if I_fatal == 1 
replace severity = 0 if crash == 0

foreach var of varlist crash severity {
	reghdfe `var' i.post_insp##c.weight insp_p insp_m, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  
	summ `var' if e(sample)
}

* 4bins
gegen weight_gp = cut(weight), group(4)

foreach var of varlist severity {
	reghdfe `var' i.post_insp##c.weight_gp insp_p insp_m, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  

	summ `var' if e(sample)
}

* truncated weight
gen weight_tr = weight if inrange(weight, 1000, 200000)
gegen weight_tr_gp = cut(weight_tr), group(4)

gstats tab weight_tr if severity ~=., s(n mean sd min p25 p50 p75 max)

	reghdfe severity i.post_insp##c.weight_tr insp_p insp_m, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  
	reghdfe severity i.post_insp##c.weight_tr_gp insp_p insp_m, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  

summ severity if e(sample)




* merge in weight enforcement/violation data
fmerge m:1 inspection_id insp_year using "$Truck/infra/processed_data/InspectionData/Insp_weight_comb.dta", keep(master match) keepusing(size_weight_enf weight_viol) nogen

gen Iweight_viol = (weight_viol > 0)

foreach var of varlist crash fatalities injuries {
	reghdfe `var' i.post_insp##i.Iweight_viol insp_p insp_m, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  

}


*-------------------------------------------------------------------------------------
* table a15

import delimited "$Truck/processed_data/TxDOT/TXrestarea.csv",clear

gen n_rest_area = 1 
collapse (sum) n_rest_area, by(countyfips )

merge 1:m countyfips using "$Truck/processed_data/TxDOT_CRIS/Insp_Crash_Event_tx.dta", keep(match using) nogen


reghdfe crash i.post_insp##c.n_rest_area insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  

gegen county_yr = group(countyfips year)

reghdfe crash i.post_insp##c.n_rest_area insp_p insp_m if inrange(event_time,-14,13) & crash_event_18hr ~= 1, absorb(county_yr new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN)  

summ crash_dr if e(sample)



*----------------------------------------------------------------------------------
* table a16 see above





*-----------------------------------------------------------------------------------------------
* table A17
use "$Truck/processed_data/EventData/division_100pct/Insp_Crash_100pct_pre.dta", clear

* save memory 
keep if inrange(event_time,-14,13) & crash_event_18hr ~= 1

keep crash insp_p insp_m event_time new_VIN year month dayofweek post_insp inspection_id insp_year road_surface_condition weather_condition light_condition event_id* vehicles_in_accident 


fmerge m:1 inspection_id insp_year using "$Truck/processed_data/InspectionData/Insp_outcome.dta", ///
	keepusing(oos_total) nogen keep(match master)

gen flag_oos = 1 if oos_total > 0 & oos_total ~= .

drop if flag_oos == 1


gen crash_bad = .
replace crash_bad = 1 if inrange(road_surface_condition,2,8) | inrange(weather_condition,2,8) | inrange(light_condition,2,8)	
replace crash_bad = 0 if crash_bad == . 

gen crash_good = .
replace crash_good = 1 if road_surface_condition == 1 & weather_condition ==1 & light_condition ==1
replace crash_good = 0 if crash_good == . 


foreach i of numlist 1/4 {
 
  gen crash_single`i' = 1 if event_id`i' ~= 13 & event_id`i' ~= 98 & event_id`i' ~= . 	
  
  gen crash_multi`i' = 1 if event_id`i' == 13

 }
gen crash_single = (crash_single1 == 1)
replace crash_single = 1 if crash_single2 == 1 
replace crash_single = 1 if crash_single3 == 1
replace crash_single = 1 if crash_single4 == 1
replace crash_single = 1 if vehicles_in_accident == 1

drop crash_single1 crash_single2 crash_single3 crash_single4 


gen crash_single_bad = 1 if crash_single == 1 & crash_bad == 1
replace crash_single_bad = 0 if crash_single_bad == .

gen crash_single_good = 1 if crash_single == 1 & crash_good == 1
replace crash_single_good = 0 if crash_single_good == .

* save memory
preserve 

keep crash_single_bad crash_single_good post_insp insp_p insp_m new_VIN year month dayofweek 

reghdfe crash_single_bad post_insp insp_p insp_m, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash_single_bad if e(sample)


reghdfe crash_single_good post_insp insp_p insp_m, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash_single_good if e(sample)


restore



*----------------------------------------------------------------------------------
* table a18

* load outcome analysis file
local insp_list new_VIN new_group_id year month dayofweek post_insp flag_oos insp_m insp_p inspection inspection_id insp_month insp_day insp_year date insp_end_time insp_time event_time
local crash_list crash crash_event_18hr event_id1 event_id2 event_id3 event_id4 vehicles_in_accident

use `insp_list' `crash_list' using "$Truck/processed_data/EventData/division_100pct/Insp_Crash_100pct_outcome.dta" if inrange(event_time,-14,13) & crash_event_18hr ~=1 & flag_oos ~=1 , clear

* duration of inspection
gen end_h = floor(insp_end_time/100)
gen end_m = insp_end_time - end_h * 100

gen end_time = mdyhms(insp_month,insp_day,insp_year,end_h,end_m,0)

gen duration = end_time - insp_time
replace duration = . if duration < 0

* generate interaction terms
gen postXduration = post_insp * duration

* look at single_crash
foreach i of numlist 1/4 {
 
  gen crash_single`i' = 1 if event_id`i' ~= 13 & event_id`i' ~= 98 & event_id`i' ~= . 	
  
 }
 
gen crash_single = (crash_single1 == 1)
replace crash_single = 1 if crash_single2 == 1 
replace crash_single = 1 if crash_single3 == 1
replace crash_single = 1 if crash_single4 == 1
replace crash_single = 1 if vehicles_in_accident == 1

drop crash_single1 crash_single2 crash_single3 crash_single4 


reghdfe crash_single post_insp duration postXduration insp_p insp_m, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN) poolsize(5) compact nosample 


* regressions: duration & start_time & time-lapse
reghdfe crash post_insp duration postXduration insp_p insp_m, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN) poolsize(5) compact nosample 


summ crash if e(sample)
 
** hetero by short and long durations 
egen duration_bin = cut(duration), group(2) 

reghdfe crash_single post_insp duration postXduration insp_p insp_m if duration_bin == 0, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash if e(sample)

reghdfe crash_single post_insp duration postXduration insp_p insp_m if duration_bin == 1, absorb(new_VIN i.year i.month i.dayofweek) vce(cluster new_VIN) poolsize(5) compact nosample 

summ crash if e(sample)



*----------------------------------------------------------------------------------
* table a19 see  "2_fig_tab.do"







